From: Eric Anholt Date: Tue, 1 May 2018 19:24:48 +0000 (-0700) Subject: v3d: Rename the driver files from "vc5" to "v3d". X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=8c47ebbd232704ab048eab2572e2b2a44f38957a;p=mesa.git v3d: Rename the driver files from "vc5" to "v3d". --- diff --git a/configure.ac b/configure.ac index 81ce001e3c8..a73471b7b45 100644 --- a/configure.ac +++ b/configure.ac @@ -3037,8 +3037,8 @@ AC_CONFIG_FILES([Makefile src/gallium/drivers/tegra/Makefile src/gallium/drivers/etnaviv/Makefile src/gallium/drivers/imx/Makefile + src/gallium/drivers/v3d/Makefile src/gallium/drivers/vc4/Makefile - src/gallium/drivers/vc5/Makefile src/gallium/drivers/virgl/Makefile src/gallium/state_trackers/clover/Makefile src/gallium/state_trackers/dri/Makefile @@ -3085,8 +3085,8 @@ AC_CONFIG_FILES([Makefile src/gallium/winsys/sw/wrapper/Makefile src/gallium/winsys/sw/xlib/Makefile src/gallium/winsys/tegra/drm/Makefile + src/gallium/winsys/v3d/drm/Makefile src/gallium/winsys/vc4/drm/Makefile - src/gallium/winsys/vc5/drm/Makefile src/gallium/winsys/virgl/drm/Makefile src/gallium/winsys/virgl/vtest/Makefile src/gbm/Makefile diff --git a/src/broadcom/Makefile.am b/src/broadcom/Makefile.am index 49267de73b5..4faa7721544 100644 --- a/src/broadcom/Makefile.am +++ b/src/broadcom/Makefile.am @@ -60,6 +60,6 @@ PYTHON_GEN = $(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS) include Makefile.genxml.am include Makefile.cle.am -include Makefile.vc5.am +include Makefile.v3d.am CLEANFILES += $(BUILT_SOURCES) diff --git a/src/broadcom/Makefile.v3d.am b/src/broadcom/Makefile.v3d.am new file mode 100644 index 00000000000..97ef2d7455e --- /dev/null +++ b/src/broadcom/Makefile.v3d.am @@ -0,0 +1,32 @@ +noinst_LTLIBRARIES += libbroadcom.la +noinst_LTLIBRARIES += libbroadcom_v33.la +noinst_LTLIBRARIES += libbroadcom_v41.la +noinst_LTLIBRARIES += libbroadcom_v42.la + +if USE_V3D_SIMULATOR +AM_CFLAGS += $(V3D_SIMULATOR_CFLAGS) +libbroadcom_la_LDFLAGS = $(V3D_SIMULATOR_LIBS) +endif + +libbroadcom_v33_la_SOURCES = $(BROADCOM_PER_VERSION_SOURCES) +libbroadcom_v33_la_CFLAGS = -DV3D_VERSION=33 + +libbroadcom_v41_la_SOURCES = $(BROADCOM_PER_VERSION_SOURCES) +libbroadcom_v41_la_CFLAGS = -DV3D_VERSION=41 + +libbroadcom_v42_la_SOURCES = $(BROADCOM_PER_VERSION_SOURCES) +libbroadcom_v42_la_CFLAGS = -DV3D_VERSION=42 + +libbroadcom_la_SOURCES = $(BROADCOM_FILES) + +check_PROGRAMS += \ + qpu/tests/qpu_disasm \ + $(NULL) + +LDADD = \ + libbroadcom.la \ + $(top_builddir)/src/compiler/nir/libnir.la \ + $(top_builddir)/src/util/libmesautil.la \ + $(NULL) + +TESTS += $(check_PROGRAMS) diff --git a/src/broadcom/Makefile.vc5.am b/src/broadcom/Makefile.vc5.am deleted file mode 100644 index 97ef2d7455e..00000000000 --- a/src/broadcom/Makefile.vc5.am +++ /dev/null @@ -1,32 +0,0 @@ -noinst_LTLIBRARIES += libbroadcom.la -noinst_LTLIBRARIES += libbroadcom_v33.la -noinst_LTLIBRARIES += libbroadcom_v41.la -noinst_LTLIBRARIES += libbroadcom_v42.la - -if USE_V3D_SIMULATOR -AM_CFLAGS += $(V3D_SIMULATOR_CFLAGS) -libbroadcom_la_LDFLAGS = $(V3D_SIMULATOR_LIBS) -endif - -libbroadcom_v33_la_SOURCES = $(BROADCOM_PER_VERSION_SOURCES) -libbroadcom_v33_la_CFLAGS = -DV3D_VERSION=33 - -libbroadcom_v41_la_SOURCES = $(BROADCOM_PER_VERSION_SOURCES) -libbroadcom_v41_la_CFLAGS = -DV3D_VERSION=41 - -libbroadcom_v42_la_SOURCES = $(BROADCOM_PER_VERSION_SOURCES) -libbroadcom_v42_la_CFLAGS = -DV3D_VERSION=42 - -libbroadcom_la_SOURCES = $(BROADCOM_FILES) - -check_PROGRAMS += \ - qpu/tests/qpu_disasm \ - $(NULL) - -LDADD = \ - libbroadcom.la \ - $(top_builddir)/src/compiler/nir/libnir.la \ - $(top_builddir)/src/util/libmesautil.la \ - $(NULL) - -TESTS += $(check_PROGRAMS) diff --git a/src/gallium/Makefile.am b/src/gallium/Makefile.am index 351bbf4fa60..e75c1866832 100644 --- a/src/gallium/Makefile.am +++ b/src/gallium/Makefile.am @@ -90,7 +90,7 @@ endif ## v3d if HAVE_GALLIUM_V3D -SUBDIRS += drivers/vc5 winsys/vc5/drm +SUBDIRS += drivers/v3d winsys/v3d/drm endif ## virgl diff --git a/src/gallium/auxiliary/target-helpers/drm_helper.h b/src/gallium/auxiliary/target-helpers/drm_helper.h index c494848888d..7eefa6e42ec 100644 --- a/src/gallium/auxiliary/target-helpers/drm_helper.h +++ b/src/gallium/auxiliary/target-helpers/drm_helper.h @@ -311,7 +311,7 @@ pipe_vc4_create_screen(int fd, const struct pipe_screen_config *config) #endif #ifdef GALLIUM_V3D -#include "vc5/drm/vc5_drm_public.h" +#include "v3d/drm/v3d_drm_public.h" struct pipe_screen * pipe_v3d_create_screen(int fd, const struct pipe_screen_config *config) diff --git a/src/gallium/drivers/v3d/.editorconfig b/src/gallium/drivers/v3d/.editorconfig new file mode 100644 index 00000000000..5a9f3c041a4 --- /dev/null +++ b/src/gallium/drivers/v3d/.editorconfig @@ -0,0 +1,3 @@ +[*.{c,h,cpp}] +indent_style = space +indent_size = 8 diff --git a/src/gallium/drivers/v3d/Automake.inc b/src/gallium/drivers/v3d/Automake.inc new file mode 100644 index 00000000000..7cf8ae7cd8b --- /dev/null +++ b/src/gallium/drivers/v3d/Automake.inc @@ -0,0 +1,14 @@ +if HAVE_GALLIUM_V3D + +TARGET_DRIVERS += v3d +TARGET_CPPFLAGS += -DGALLIUM_V3D +TARGET_LIB_DEPS += \ + $(top_builddir)/src/gallium/winsys/v3d/drm/libv3ddrm.la \ + $(top_builddir)/src/gallium/drivers/v3d/libv3d.la \ + $(top_builddir)/src/broadcom/libbroadcom.la + +if !HAVE_GALLIUM_VC4 +TARGET_LIB_DEPS += $(top_builddir)/src/broadcom/cle/libbroadcom_cle.la +endif + +endif diff --git a/src/gallium/drivers/v3d/Makefile.am b/src/gallium/drivers/v3d/Makefile.am new file mode 100644 index 00000000000..2b4c364c24e --- /dev/null +++ b/src/gallium/drivers/v3d/Makefile.am @@ -0,0 +1,56 @@ +# Copyright © 2014 Broadcom +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. + +include Makefile.sources +include $(top_srcdir)/src/gallium/Automake.inc + +AM_CFLAGS = \ + -I$(top_builddir)/src/compiler/nir \ + -I$(top_builddir)/src/broadcom \ + $(LIBDRM_CFLAGS) \ + $(V3D_SIMULATOR_CFLAGS) \ + $(GALLIUM_DRIVER_CFLAGS) \ + $(VALGRIND_CFLAGS) \ + $() + +noinst_LTLIBRARIES = \ + libv3d.la \ + libv3d_v33.la \ + libv3d_v41.la \ + $() + +libv3d_v33_la_SOURCES = $(V3D_PER_VERSION_SOURCES) +libv3d_v33_la_CFLAGS = $(AM_CFLAGS) -DV3D_VERSION=33 + +libv3d_v41_la_SOURCES = $(V3D_PER_VERSION_SOURCES) +libv3d_v41_la_CFLAGS = $(AM_CFLAGS) -DV3D_VERSION=41 + +libv3d_la_SOURCES = $(C_SOURCES) + +libv3d_la_LDFLAGS = \ + $(V3D_SIMULATOR_LIBS) \ + $(NULL) +libv3d_la_LIBADD = \ + libv3d_v33.la \ + libv3d_v41.la \ + $() + +EXTRA_DIST = meson.build diff --git a/src/gallium/drivers/v3d/Makefile.sources b/src/gallium/drivers/v3d/Makefile.sources new file mode 100644 index 00000000000..c81ccb42013 --- /dev/null +++ b/src/gallium/drivers/v3d/Makefile.sources @@ -0,0 +1,36 @@ +C_SOURCES := \ + v3d_blit.c \ + v3d_bufmgr.c \ + v3d_bufmgr.h \ + v3d_cl.c \ + v3d_cl.h \ + v3d_context.c \ + v3d_context.h \ + v3d_fence.c \ + v3d_formats.c \ + v3d_format_table.h \ + v3d_job.c \ + v3d_program.c \ + v3d_query.c \ + v3d_resource.c \ + v3d_resource.h \ + v3d_screen.c \ + v3d_screen.h \ + v3d_simulator.c \ + v3d_simulator_wrapper.cpp \ + v3d_simulator_wrapper.h \ + v3d_tiling.c \ + v3d_tiling.h \ + v3d_uniforms.c \ + $() + +V3D_PER_VERSION_SOURCES = \ + v3dx_context.h \ + v3dx_draw.c \ + v3dx_emit.c \ + v3dx_format_table.c \ + v3dx_job.c \ + v3dx_rcl.c \ + v3dx_simulator.c \ + v3dx_state.c \ + $() diff --git a/src/gallium/drivers/v3d/meson.build b/src/gallium/drivers/v3d/meson.build new file mode 100644 index 00000000000..38021515eda --- /dev/null +++ b/src/gallium/drivers/v3d/meson.build @@ -0,0 +1,96 @@ +# Copyright © 2017 Broadcom +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +files_libv3d = files( + 'v3d_blit.c', + 'v3d_bufmgr.c', + 'v3d_bufmgr.h', + 'v3d_cl.c', + 'v3d_cl.h', + 'v3d_context.c', + 'v3d_context.h', + 'v3d_fence.c', + 'v3d_formats.c', + 'v3d_job.c', + 'v3d_program.c', + 'v3d_query.c', + 'v3d_resource.c', + 'v3d_resource.h', + 'v3d_screen.c', + 'v3d_screen.h', + 'v3d_simulator.c', + 'v3d_simulator_wrapper.cpp', + 'v3d_tiling.c', + 'v3d_tiling.h', + 'v3d_uniforms.c', +) + +files_per_version = files( + 'v3dx_draw.c', + 'v3dx_emit.c', + 'v3dx_format_table.c', + 'v3dx_job.c', + 'v3dx_rcl.c', + 'v3dx_simulator.c', + 'v3dx_state.c', +) + +v3dv3_c_args = [] +dep_v3dv3 = dependency('v3dv3') +if dep_v3dv3.found() + v3dv3_c_args = '-DUSE_V3D_SIMULATOR' +endif + +v3d_versions = ['33', '41'] + +per_version_libs = [] +foreach ver : v3d_versions + per_version_libs += static_library( + 'v3d-v' + ver, + [files_per_version, v3d_xml_pack, nir_opcodes_h, nir_builder_opcodes_h], + include_directories : [ + inc_src, inc_include, inc_gallium, inc_gallium_aux, inc_broadcom, + inc_gallium_drivers, inc_drm_uapi, + ], + c_args : [c_vis_args, v3dv3_c_args, '-DV3D_VERSION=' + ver], + cpp_args : [cpp_vis_args], + dependencies : [dep_v3dv3, dep_libdrm, dep_valgrind], +) + +endforeach + +libv3d = static_library( + 'v3d', + [files_libv3d, v3d_xml_pack], + include_directories : [ + inc_src, inc_include, inc_gallium, inc_gallium_aux, inc_broadcom, + inc_gallium_drivers, inc_drm_uapi, + ], + c_args : [c_vis_args, v3dv3_c_args], + cpp_args : [cpp_vis_args, v3dv3_c_args], + dependencies : [dep_v3dv3, dep_libdrm, dep_valgrind, idep_nir_headers], + link_with: per_version_libs, +) + +driver_v3d = declare_dependency( + compile_args : '-DGALLIUM_V3D', + link_with : [libv3d, libv3dwinsys, libbroadcom_cle, libbroadcom_v3d], + dependencies : idep_nir, +) diff --git a/src/gallium/drivers/v3d/v3d_blit.c b/src/gallium/drivers/v3d/v3d_blit.c new file mode 100644 index 00000000000..7c67d4561ba --- /dev/null +++ b/src/gallium/drivers/v3d/v3d_blit.c @@ -0,0 +1,302 @@ +/* + * Copyright © 2015-2017 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "util/u_format.h" +#include "util/u_surface.h" +#include "util/u_blitter.h" +#include "v3d_context.h" + +#if 0 +static struct pipe_surface * +vc5_get_blit_surface(struct pipe_context *pctx, + struct pipe_resource *prsc, unsigned level) +{ + struct pipe_surface tmpl; + + memset(&tmpl, 0, sizeof(tmpl)); + tmpl.format = prsc->format; + tmpl.u.tex.level = level; + tmpl.u.tex.first_layer = 0; + tmpl.u.tex.last_layer = 0; + + return pctx->create_surface(pctx, prsc, &tmpl); +} + +static bool +is_tile_unaligned(unsigned size, unsigned tile_size) +{ + return size & (tile_size - 1); +} + +static bool +vc5_tile_blit(struct pipe_context *pctx, const struct pipe_blit_info *info) +{ + struct vc5_context *vc5 = vc5_context(pctx); + bool msaa = (info->src.resource->nr_samples > 1 || + info->dst.resource->nr_samples > 1); + int tile_width = msaa ? 32 : 64; + int tile_height = msaa ? 32 : 64; + + if (util_format_is_depth_or_stencil(info->dst.resource->format)) + return false; + + if (info->scissor_enable) + return false; + + if ((info->mask & PIPE_MASK_RGBA) == 0) + return false; + + if (info->dst.box.x != info->src.box.x || + info->dst.box.y != info->src.box.y || + info->dst.box.width != info->src.box.width || + info->dst.box.height != info->src.box.height) { + return false; + } + + int dst_surface_width = u_minify(info->dst.resource->width0, + info->dst.level); + int dst_surface_height = u_minify(info->dst.resource->height0, + info->dst.level); + if (is_tile_unaligned(info->dst.box.x, tile_width) || + is_tile_unaligned(info->dst.box.y, tile_height) || + (is_tile_unaligned(info->dst.box.width, tile_width) && + info->dst.box.x + info->dst.box.width != dst_surface_width) || + (is_tile_unaligned(info->dst.box.height, tile_height) && + info->dst.box.y + info->dst.box.height != dst_surface_height)) { + return false; + } + + /* VC5_PACKET_LOAD_TILE_BUFFER_GENERAL uses the + * VC5_PACKET_TILE_RENDERING_MODE_CONFIG's width (determined by our + * destination surface) to determine the stride. This may be wrong + * when reading from texture miplevels > 0, which are stored in + * POT-sized areas. For MSAA, the tile addresses are computed + * explicitly by the RCL, but still use the destination width to + * determine the stride (which could be fixed by explicitly supplying + * it in the ABI). + */ + struct vc5_resource *rsc = vc5_resource(info->src.resource); + + uint32_t stride; + + if (info->src.resource->nr_samples > 1) + stride = align(dst_surface_width, 32) * 4 * rsc->cpp; + /* XXX else if (rsc->slices[info->src.level].tiling == VC5_TILING_FORMAT_T) + stride = align(dst_surface_width * rsc->cpp, 128); */ + else + stride = align(dst_surface_width * rsc->cpp, 16); + + if (stride != rsc->slices[info->src.level].stride) + return false; + + if (info->dst.resource->format != info->src.resource->format) + return false; + + if (false) { + fprintf(stderr, "RCL blit from %d,%d to %d,%d (%d,%d)\n", + info->src.box.x, + info->src.box.y, + info->dst.box.x, + info->dst.box.y, + info->dst.box.width, + info->dst.box.height); + } + + struct pipe_surface *dst_surf = + vc5_get_blit_surface(pctx, info->dst.resource, info->dst.level); + struct pipe_surface *src_surf = + vc5_get_blit_surface(pctx, info->src.resource, info->src.level); + + vc5_flush_jobs_reading_resource(vc5, info->src.resource); + + struct vc5_job *job = vc5_get_job(vc5, dst_surf, NULL); + pipe_surface_reference(&job->color_read, src_surf); + + /* If we're resolving from MSAA to single sample, we still need to run + * the engine in MSAA mode for the load. + */ + if (!job->msaa && info->src.resource->nr_samples > 1) { + job->msaa = true; + job->tile_width = 32; + job->tile_height = 32; + } + + job->draw_min_x = info->dst.box.x; + job->draw_min_y = info->dst.box.y; + job->draw_max_x = info->dst.box.x + info->dst.box.width; + job->draw_max_y = info->dst.box.y + info->dst.box.height; + job->draw_width = dst_surf->width; + job->draw_height = dst_surf->height; + + job->tile_width = tile_width; + job->tile_height = tile_height; + job->msaa = msaa; + job->needs_flush = true; + job->resolve |= PIPE_CLEAR_COLOR; + + vc5_job_submit(vc5, job); + + pipe_surface_reference(&dst_surf, NULL); + pipe_surface_reference(&src_surf, NULL); + + return true; +} +#endif + +void +vc5_blitter_save(struct vc5_context *vc5) +{ + util_blitter_save_fragment_constant_buffer_slot(vc5->blitter, + vc5->constbuf[PIPE_SHADER_FRAGMENT].cb); + util_blitter_save_vertex_buffer_slot(vc5->blitter, vc5->vertexbuf.vb); + util_blitter_save_vertex_elements(vc5->blitter, vc5->vtx); + util_blitter_save_vertex_shader(vc5->blitter, vc5->prog.bind_vs); + util_blitter_save_so_targets(vc5->blitter, vc5->streamout.num_targets, + vc5->streamout.targets); + util_blitter_save_rasterizer(vc5->blitter, vc5->rasterizer); + util_blitter_save_viewport(vc5->blitter, &vc5->viewport); + util_blitter_save_scissor(vc5->blitter, &vc5->scissor); + util_blitter_save_fragment_shader(vc5->blitter, vc5->prog.bind_fs); + util_blitter_save_blend(vc5->blitter, vc5->blend); + util_blitter_save_depth_stencil_alpha(vc5->blitter, vc5->zsa); + util_blitter_save_stencil_ref(vc5->blitter, &vc5->stencil_ref); + util_blitter_save_sample_mask(vc5->blitter, vc5->sample_mask); + util_blitter_save_framebuffer(vc5->blitter, &vc5->framebuffer); + util_blitter_save_fragment_sampler_states(vc5->blitter, + vc5->fragtex.num_samplers, + (void **)vc5->fragtex.samplers); + util_blitter_save_fragment_sampler_views(vc5->blitter, + vc5->fragtex.num_textures, vc5->fragtex.textures); + util_blitter_save_so_targets(vc5->blitter, vc5->streamout.num_targets, + vc5->streamout.targets); +} + +static bool +vc5_render_blit(struct pipe_context *ctx, struct pipe_blit_info *info) +{ + struct vc5_context *vc5 = vc5_context(ctx); + + if (!util_blitter_is_blit_supported(vc5->blitter, info)) { + fprintf(stderr, "blit unsupported %s -> %s\n", + util_format_short_name(info->src.resource->format), + util_format_short_name(info->dst.resource->format)); + return false; + } + + vc5_blitter_save(vc5); + util_blitter_blit(vc5->blitter, info); + + return true; +} + +/* Implement stencil blits by reinterpreting the stencil data as an RGBA8888 + * or R8 texture. + */ +static void +vc5_stencil_blit(struct pipe_context *ctx, const struct pipe_blit_info *info) +{ + struct vc5_context *vc5 = vc5_context(ctx); + struct vc5_resource *src = vc5_resource(info->src.resource); + struct vc5_resource *dst = vc5_resource(info->dst.resource); + enum pipe_format src_format, dst_format; + + if (src->separate_stencil) { + src = src->separate_stencil; + src_format = PIPE_FORMAT_R8_UNORM; + } else { + src_format = PIPE_FORMAT_RGBA8888_UNORM; + } + + if (dst->separate_stencil) { + dst = dst->separate_stencil; + dst_format = PIPE_FORMAT_R8_UNORM; + } else { + dst_format = PIPE_FORMAT_RGBA8888_UNORM; + } + + /* Initialize the surface. */ + struct pipe_surface dst_tmpl = { + .u.tex = { + .level = info->dst.level, + .first_layer = info->dst.box.z, + .last_layer = info->dst.box.z, + }, + .format = dst_format, + }; + struct pipe_surface *dst_surf = + ctx->create_surface(ctx, &dst->base, &dst_tmpl); + + /* Initialize the sampler view. */ + struct pipe_sampler_view src_tmpl = { + .target = src->base.target, + .format = src_format, + .u.tex = { + .first_level = info->src.level, + .last_level = info->src.level, + .first_layer = 0, + .last_layer = (PIPE_TEXTURE_3D ? + u_minify(src->base.depth0, + info->src.level) - 1 : + src->base.array_size - 1), + }, + .swizzle_r = PIPE_SWIZZLE_X, + .swizzle_g = PIPE_SWIZZLE_Y, + .swizzle_b = PIPE_SWIZZLE_Z, + .swizzle_a = PIPE_SWIZZLE_W, + }; + struct pipe_sampler_view *src_view = + ctx->create_sampler_view(ctx, &src->base, &src_tmpl); + + vc5_blitter_save(vc5); + util_blitter_blit_generic(vc5->blitter, dst_surf, &info->dst.box, + src_view, &info->src.box, + src->base.width0, src->base.height0, + PIPE_MASK_R, + PIPE_TEX_FILTER_NEAREST, + info->scissor_enable ? &info->scissor : NULL, + info->alpha_blend); + + pipe_surface_reference(&dst_surf, NULL); + pipe_sampler_view_reference(&src_view, NULL); +} + +/* Optimal hardware path for blitting pixels. + * Scaling, format conversion, up- and downsampling (resolve) are allowed. + */ +void +vc5_blit(struct pipe_context *pctx, const struct pipe_blit_info *blit_info) +{ + struct pipe_blit_info info = *blit_info; + + if (info.mask & PIPE_MASK_S) { + vc5_stencil_blit(pctx, blit_info); + info.mask &= ~PIPE_MASK_S; + } + +#if 0 + if (vc5_tile_blit(pctx, blit_info)) + return; +#endif + + vc5_render_blit(pctx, &info); +} diff --git a/src/gallium/drivers/v3d/v3d_bufmgr.c b/src/gallium/drivers/v3d/v3d_bufmgr.c new file mode 100644 index 00000000000..ef2a5fa07be --- /dev/null +++ b/src/gallium/drivers/v3d/v3d_bufmgr.c @@ -0,0 +1,552 @@ +/* + * Copyright © 2014-2017 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include + +#include "util/u_hash_table.h" +#include "util/u_memory.h" +#include "util/ralloc.h" + +#include "v3d_context.h" +#include "v3d_screen.h" + +#ifdef HAVE_VALGRIND +#include +#include +#define VG(x) x +#else +#define VG(x) +#endif + +static bool dump_stats = false; + +static void +vc5_bo_cache_free_all(struct vc5_bo_cache *cache); + +static void +vc5_bo_dump_stats(struct vc5_screen *screen) +{ + struct vc5_bo_cache *cache = &screen->bo_cache; + + fprintf(stderr, " BOs allocated: %d\n", screen->bo_count); + fprintf(stderr, " BOs size: %dkb\n", screen->bo_size / 1024); + fprintf(stderr, " BOs cached: %d\n", cache->bo_count); + fprintf(stderr, " BOs cached size: %dkb\n", cache->bo_size / 1024); + + if (!list_empty(&cache->time_list)) { + struct vc5_bo *first = LIST_ENTRY(struct vc5_bo, + cache->time_list.next, + time_list); + struct vc5_bo *last = LIST_ENTRY(struct vc5_bo, + cache->time_list.prev, + time_list); + + fprintf(stderr, " oldest cache time: %ld\n", + (long)first->free_time); + fprintf(stderr, " newest cache time: %ld\n", + (long)last->free_time); + + struct timespec time; + clock_gettime(CLOCK_MONOTONIC, &time); + fprintf(stderr, " now: %ld\n", + time.tv_sec); + } +} + +static void +vc5_bo_remove_from_cache(struct vc5_bo_cache *cache, struct vc5_bo *bo) +{ + list_del(&bo->time_list); + list_del(&bo->size_list); + cache->bo_count--; + cache->bo_size -= bo->size; +} + +static struct vc5_bo * +vc5_bo_from_cache(struct vc5_screen *screen, uint32_t size, const char *name) +{ + struct vc5_bo_cache *cache = &screen->bo_cache; + uint32_t page_index = size / 4096 - 1; + + if (cache->size_list_size <= page_index) + return NULL; + + struct vc5_bo *bo = NULL; + mtx_lock(&cache->lock); + if (!list_empty(&cache->size_list[page_index])) { + bo = LIST_ENTRY(struct vc5_bo, cache->size_list[page_index].next, + size_list); + + /* Check that the BO has gone idle. If not, then we want to + * allocate something new instead, since we assume that the + * user will proceed to CPU map it and fill it with stuff. + */ + if (!vc5_bo_wait(bo, 0, NULL)) { + mtx_unlock(&cache->lock); + return NULL; + } + + pipe_reference_init(&bo->reference, 1); + vc5_bo_remove_from_cache(cache, bo); + + bo->name = name; + } + mtx_unlock(&cache->lock); + return bo; +} + +struct vc5_bo * +vc5_bo_alloc(struct vc5_screen *screen, uint32_t size, const char *name) +{ + struct vc5_bo *bo; + int ret; + + size = align(size, 4096); + + bo = vc5_bo_from_cache(screen, size, name); + if (bo) { + if (dump_stats) { + fprintf(stderr, "Allocated %s %dkb from cache:\n", + name, size / 1024); + vc5_bo_dump_stats(screen); + } + return bo; + } + + bo = CALLOC_STRUCT(vc5_bo); + if (!bo) + return NULL; + + pipe_reference_init(&bo->reference, 1); + bo->screen = screen; + bo->size = size; + bo->name = name; + bo->private = true; + + retry: + ; + + bool cleared_and_retried = false; + struct drm_v3d_create_bo create = { + .size = size + }; + + ret = vc5_ioctl(screen->fd, DRM_IOCTL_V3D_CREATE_BO, &create); + bo->handle = create.handle; + bo->offset = create.offset; + + if (ret != 0) { + if (!list_empty(&screen->bo_cache.time_list) && + !cleared_and_retried) { + cleared_and_retried = true; + vc5_bo_cache_free_all(&screen->bo_cache); + goto retry; + } + + free(bo); + return NULL; + } + + screen->bo_count++; + screen->bo_size += bo->size; + if (dump_stats) { + fprintf(stderr, "Allocated %s %dkb:\n", name, size / 1024); + vc5_bo_dump_stats(screen); + } + + return bo; +} + +void +vc5_bo_last_unreference(struct vc5_bo *bo) +{ + struct vc5_screen *screen = bo->screen; + + struct timespec time; + clock_gettime(CLOCK_MONOTONIC, &time); + mtx_lock(&screen->bo_cache.lock); + vc5_bo_last_unreference_locked_timed(bo, time.tv_sec); + mtx_unlock(&screen->bo_cache.lock); +} + +static void +vc5_bo_free(struct vc5_bo *bo) +{ + struct vc5_screen *screen = bo->screen; + + if (bo->map) { + if (using_vc5_simulator && bo->name && + strcmp(bo->name, "winsys") == 0) { + free(bo->map); + } else { + munmap(bo->map, bo->size); + VG(VALGRIND_FREELIKE_BLOCK(bo->map, 0)); + } + } + + struct drm_gem_close c; + memset(&c, 0, sizeof(c)); + c.handle = bo->handle; + int ret = vc5_ioctl(screen->fd, DRM_IOCTL_GEM_CLOSE, &c); + if (ret != 0) + fprintf(stderr, "close object %d: %s\n", bo->handle, strerror(errno)); + + screen->bo_count--; + screen->bo_size -= bo->size; + + if (dump_stats) { + fprintf(stderr, "Freed %s%s%dkb:\n", + bo->name ? bo->name : "", + bo->name ? " " : "", + bo->size / 1024); + vc5_bo_dump_stats(screen); + } + + free(bo); +} + +static void +free_stale_bos(struct vc5_screen *screen, time_t time) +{ + struct vc5_bo_cache *cache = &screen->bo_cache; + bool freed_any = false; + + list_for_each_entry_safe(struct vc5_bo, bo, &cache->time_list, + time_list) { + if (dump_stats && !freed_any) { + fprintf(stderr, "Freeing stale BOs:\n"); + vc5_bo_dump_stats(screen); + freed_any = true; + } + + /* If it's more than a second old, free it. */ + if (time - bo->free_time > 2) { + vc5_bo_remove_from_cache(cache, bo); + vc5_bo_free(bo); + } else { + break; + } + } + + if (dump_stats && freed_any) { + fprintf(stderr, "Freed stale BOs:\n"); + vc5_bo_dump_stats(screen); + } +} + +static void +vc5_bo_cache_free_all(struct vc5_bo_cache *cache) +{ + mtx_lock(&cache->lock); + list_for_each_entry_safe(struct vc5_bo, bo, &cache->time_list, + time_list) { + vc5_bo_remove_from_cache(cache, bo); + vc5_bo_free(bo); + } + mtx_unlock(&cache->lock); +} + +void +vc5_bo_last_unreference_locked_timed(struct vc5_bo *bo, time_t time) +{ + struct vc5_screen *screen = bo->screen; + struct vc5_bo_cache *cache = &screen->bo_cache; + uint32_t page_index = bo->size / 4096 - 1; + + if (!bo->private) { + vc5_bo_free(bo); + return; + } + + if (cache->size_list_size <= page_index) { + struct list_head *new_list = + ralloc_array(screen, struct list_head, page_index + 1); + + /* Move old list contents over (since the array has moved, and + * therefore the pointers to the list heads have to change). + */ + for (int i = 0; i < cache->size_list_size; i++) { + struct list_head *old_head = &cache->size_list[i]; + if (list_empty(old_head)) + list_inithead(&new_list[i]); + else { + new_list[i].next = old_head->next; + new_list[i].prev = old_head->prev; + new_list[i].next->prev = &new_list[i]; + new_list[i].prev->next = &new_list[i]; + } + } + for (int i = cache->size_list_size; i < page_index + 1; i++) + list_inithead(&new_list[i]); + + cache->size_list = new_list; + cache->size_list_size = page_index + 1; + } + + bo->free_time = time; + list_addtail(&bo->size_list, &cache->size_list[page_index]); + list_addtail(&bo->time_list, &cache->time_list); + cache->bo_count++; + cache->bo_size += bo->size; + if (dump_stats) { + fprintf(stderr, "Freed %s %dkb to cache:\n", + bo->name, bo->size / 1024); + vc5_bo_dump_stats(screen); + } + bo->name = NULL; + + free_stale_bos(screen, time); +} + +static struct vc5_bo * +vc5_bo_open_handle(struct vc5_screen *screen, + uint32_t winsys_stride, + uint32_t handle, uint32_t size) +{ + struct vc5_bo *bo; + + assert(size); + + mtx_lock(&screen->bo_handles_mutex); + + bo = util_hash_table_get(screen->bo_handles, (void*)(uintptr_t)handle); + if (bo) { + pipe_reference(NULL, &bo->reference); + goto done; + } + + bo = CALLOC_STRUCT(vc5_bo); + pipe_reference_init(&bo->reference, 1); + bo->screen = screen; + bo->handle = handle; + bo->size = size; + bo->name = "winsys"; + bo->private = false; + +#ifdef USE_V3D_SIMULATOR + vc5_simulator_open_from_handle(screen->fd, winsys_stride, + bo->handle, bo->size); + bo->map = malloc(bo->size); +#endif + + struct drm_v3d_get_bo_offset get = { + .handle = handle, + }; + int ret = vc5_ioctl(screen->fd, DRM_IOCTL_V3D_GET_BO_OFFSET, &get); + if (ret) { + fprintf(stderr, "Failed to get BO offset: %s\n", + strerror(errno)); + free(bo->map); + free(bo); + return NULL; + } + bo->offset = get.offset; + assert(bo->offset != 0); + + util_hash_table_set(screen->bo_handles, (void *)(uintptr_t)handle, bo); + +done: + mtx_unlock(&screen->bo_handles_mutex); + return bo; +} + +struct vc5_bo * +vc5_bo_open_name(struct vc5_screen *screen, uint32_t name, + uint32_t winsys_stride) +{ + struct drm_gem_open o = { + .name = name + }; + int ret = vc5_ioctl(screen->fd, DRM_IOCTL_GEM_OPEN, &o); + if (ret) { + fprintf(stderr, "Failed to open bo %d: %s\n", + name, strerror(errno)); + return NULL; + } + + return vc5_bo_open_handle(screen, winsys_stride, o.handle, o.size); +} + +struct vc5_bo * +vc5_bo_open_dmabuf(struct vc5_screen *screen, int fd, uint32_t winsys_stride) +{ + uint32_t handle; + int ret = drmPrimeFDToHandle(screen->fd, fd, &handle); + int size; + if (ret) { + fprintf(stderr, "Failed to get vc5 handle for dmabuf %d\n", fd); + return NULL; + } + + /* Determine the size of the bo we were handed. */ + size = lseek(fd, 0, SEEK_END); + if (size == -1) { + fprintf(stderr, "Couldn't get size of dmabuf fd %d.\n", fd); + return NULL; + } + + return vc5_bo_open_handle(screen, winsys_stride, handle, size); +} + +int +vc5_bo_get_dmabuf(struct vc5_bo *bo) +{ + int fd; + int ret = drmPrimeHandleToFD(bo->screen->fd, bo->handle, + O_CLOEXEC, &fd); + if (ret != 0) { + fprintf(stderr, "Failed to export gem bo %d to dmabuf\n", + bo->handle); + return -1; + } + + mtx_lock(&bo->screen->bo_handles_mutex); + bo->private = false; + util_hash_table_set(bo->screen->bo_handles, (void *)(uintptr_t)bo->handle, bo); + mtx_unlock(&bo->screen->bo_handles_mutex); + + return fd; +} + +bool +vc5_bo_flink(struct vc5_bo *bo, uint32_t *name) +{ + struct drm_gem_flink flink = { + .handle = bo->handle, + }; + int ret = vc5_ioctl(bo->screen->fd, DRM_IOCTL_GEM_FLINK, &flink); + if (ret) { + fprintf(stderr, "Failed to flink bo %d: %s\n", + bo->handle, strerror(errno)); + free(bo); + return false; + } + + bo->private = false; + *name = flink.name; + + return true; +} + +static int vc5_wait_bo_ioctl(int fd, uint32_t handle, uint64_t timeout_ns) +{ + struct drm_v3d_wait_bo wait = { + .handle = handle, + .timeout_ns = timeout_ns, + }; + int ret = vc5_ioctl(fd, DRM_IOCTL_V3D_WAIT_BO, &wait); + if (ret == -1) + return -errno; + else + return 0; + +} + +bool +vc5_bo_wait(struct vc5_bo *bo, uint64_t timeout_ns, const char *reason) +{ + struct vc5_screen *screen = bo->screen; + + if (unlikely(V3D_DEBUG & V3D_DEBUG_PERF) && timeout_ns && reason) { + if (vc5_wait_bo_ioctl(screen->fd, bo->handle, 0) == -ETIME) { + fprintf(stderr, "Blocking on %s BO for %s\n", + bo->name, reason); + } + } + + int ret = vc5_wait_bo_ioctl(screen->fd, bo->handle, timeout_ns); + if (ret) { + if (ret != -ETIME) { + fprintf(stderr, "wait failed: %d\n", ret); + abort(); + } + + return false; + } + + return true; +} + +void * +vc5_bo_map_unsynchronized(struct vc5_bo *bo) +{ + uint64_t offset; + int ret; + + if (bo->map) + return bo->map; + + struct drm_v3d_mmap_bo map; + memset(&map, 0, sizeof(map)); + map.handle = bo->handle; + ret = vc5_ioctl(bo->screen->fd, DRM_IOCTL_V3D_MMAP_BO, &map); + offset = map.offset; + if (ret != 0) { + fprintf(stderr, "map ioctl failure\n"); + abort(); + } + + bo->map = mmap(NULL, bo->size, PROT_READ | PROT_WRITE, MAP_SHARED, + bo->screen->fd, offset); + if (bo->map == MAP_FAILED) { + fprintf(stderr, "mmap of bo %d (offset 0x%016llx, size %d) failed\n", + bo->handle, (long long)offset, bo->size); + abort(); + } + VG(VALGRIND_MALLOCLIKE_BLOCK(bo->map, bo->size, 0, false)); + + return bo->map; +} + +void * +vc5_bo_map(struct vc5_bo *bo) +{ + void *map = vc5_bo_map_unsynchronized(bo); + + bool ok = vc5_bo_wait(bo, PIPE_TIMEOUT_INFINITE, "bo map"); + if (!ok) { + fprintf(stderr, "BO wait for map failed\n"); + abort(); + } + + return map; +} + +void +vc5_bufmgr_destroy(struct pipe_screen *pscreen) +{ + struct vc5_screen *screen = vc5_screen(pscreen); + struct vc5_bo_cache *cache = &screen->bo_cache; + + vc5_bo_cache_free_all(cache); + + if (dump_stats) { + fprintf(stderr, "BO stats after screen destroy:\n"); + vc5_bo_dump_stats(screen); + } +} diff --git a/src/gallium/drivers/v3d/v3d_bufmgr.h b/src/gallium/drivers/v3d/v3d_bufmgr.h new file mode 100644 index 00000000000..4519a206026 --- /dev/null +++ b/src/gallium/drivers/v3d/v3d_bufmgr.h @@ -0,0 +1,140 @@ +/* + * Copyright © 2014 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef VC5_BUFMGR_H +#define VC5_BUFMGR_H + +#include +#include "util/u_hash_table.h" +#include "util/u_inlines.h" +#include "util/list.h" +#include "v3d_screen.h" + +struct vc5_context; + +struct vc5_bo { + struct pipe_reference reference; + struct vc5_screen *screen; + void *map; + const char *name; + uint32_t handle; + uint32_t size; + + /* Address of the BO in our page tables. */ + uint32_t offset; + + /** Entry in the linked list of buffers freed, by age. */ + struct list_head time_list; + /** Entry in the per-page-count linked list of buffers freed (by age). */ + struct list_head size_list; + /** Approximate second when the bo was freed. */ + time_t free_time; + /** + * Whether only our process has a reference to the BO (meaning that + * it's safe to reuse it in the BO cache). + */ + bool private; +}; + +struct vc5_bo *vc5_bo_alloc(struct vc5_screen *screen, uint32_t size, + const char *name); +void vc5_bo_last_unreference(struct vc5_bo *bo); +void vc5_bo_last_unreference_locked_timed(struct vc5_bo *bo, time_t time); +struct vc5_bo *vc5_bo_open_name(struct vc5_screen *screen, uint32_t name, + uint32_t winsys_stride); +struct vc5_bo *vc5_bo_open_dmabuf(struct vc5_screen *screen, int fd, + uint32_t winsys_stride); +bool vc5_bo_flink(struct vc5_bo *bo, uint32_t *name); +int vc5_bo_get_dmabuf(struct vc5_bo *bo); + +static inline void +vc5_bo_set_reference(struct vc5_bo **old_bo, struct vc5_bo *new_bo) +{ + if (pipe_reference(&(*old_bo)->reference, &new_bo->reference)) + vc5_bo_last_unreference(*old_bo); + *old_bo = new_bo; +} + +static inline struct vc5_bo * +vc5_bo_reference(struct vc5_bo *bo) +{ + pipe_reference(NULL, &bo->reference); + return bo; +} + +static inline void +vc5_bo_unreference(struct vc5_bo **bo) +{ + struct vc5_screen *screen; + if (!*bo) + return; + + if ((*bo)->private) { + /* Avoid the mutex for private BOs */ + if (pipe_reference(&(*bo)->reference, NULL)) + vc5_bo_last_unreference(*bo); + } else { + screen = (*bo)->screen; + mtx_lock(&screen->bo_handles_mutex); + + if (pipe_reference(&(*bo)->reference, NULL)) { + util_hash_table_remove(screen->bo_handles, + (void *)(uintptr_t)(*bo)->handle); + vc5_bo_last_unreference(*bo); + } + + mtx_unlock(&screen->bo_handles_mutex); + } + + *bo = NULL; +} + +static inline void +vc5_bo_unreference_locked_timed(struct vc5_bo **bo, time_t time) +{ + if (!*bo) + return; + + if (pipe_reference(&(*bo)->reference, NULL)) + vc5_bo_last_unreference_locked_timed(*bo, time); + *bo = NULL; +} + +void * +vc5_bo_map(struct vc5_bo *bo); + +void * +vc5_bo_map_unsynchronized(struct vc5_bo *bo); + +bool +vc5_bo_wait(struct vc5_bo *bo, uint64_t timeout_ns, const char *reason); + +bool +vc5_wait_seqno(struct vc5_screen *screen, uint64_t seqno, uint64_t timeout_ns, + const char *reason); + +void +vc5_bufmgr_destroy(struct pipe_screen *pscreen); + +#endif /* VC5_BUFMGR_H */ + diff --git a/src/gallium/drivers/v3d/v3d_cl.c b/src/gallium/drivers/v3d/v3d_cl.c new file mode 100644 index 00000000000..2ffb7ea9a2c --- /dev/null +++ b/src/gallium/drivers/v3d/v3d_cl.c @@ -0,0 +1,90 @@ +/* + * Copyright © 2014-2017 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "util/u_math.h" +#include "util/ralloc.h" +#include "v3d_context.h" +/* The branching packets are the same across V3D versions. */ +#define V3D_VERSION 33 +#include "broadcom/common/v3d_macros.h" +#include "broadcom/cle/v3dx_pack.h" + +void +vc5_init_cl(struct vc5_job *job, struct vc5_cl *cl) +{ + cl->base = NULL; + cl->next = cl->base; + cl->size = 0; + cl->job = job; +} + +uint32_t +vc5_cl_ensure_space(struct vc5_cl *cl, uint32_t space, uint32_t alignment) +{ + uint32_t offset = align(cl_offset(cl), alignment); + + if (offset + space <= cl->size) { + cl->next = cl->base + offset; + return offset; + } + + vc5_bo_unreference(&cl->bo); + cl->bo = vc5_bo_alloc(cl->job->vc5->screen, align(space, 4096), "CL"); + cl->base = vc5_bo_map(cl->bo); + cl->size = cl->bo->size; + cl->next = cl->base; + + return 0; +} + +void +vc5_cl_ensure_space_with_branch(struct vc5_cl *cl, uint32_t space) +{ + if (cl_offset(cl) + space + cl_packet_length(BRANCH) <= cl->size) + return; + + struct vc5_bo *new_bo = vc5_bo_alloc(cl->job->vc5->screen, 4096, "CL"); + assert(space <= new_bo->size); + + /* Chain to the new BO from the old one. */ + if (cl->bo) { + cl_emit(cl, BRANCH, branch) { + branch.address = cl_address(new_bo, 0); + } + vc5_bo_unreference(&cl->bo); + } else { + /* Root the first RCL/BCL BO in the job. */ + vc5_job_add_bo(cl->job, cl->bo); + } + + cl->bo = new_bo; + cl->base = vc5_bo_map(cl->bo); + cl->size = cl->bo->size; + cl->next = cl->base; +} + +void +vc5_destroy_cl(struct vc5_cl *cl) +{ + vc5_bo_unreference(&cl->bo); +} diff --git a/src/gallium/drivers/v3d/v3d_cl.h b/src/gallium/drivers/v3d/v3d_cl.h new file mode 100644 index 00000000000..7025b5a672b --- /dev/null +++ b/src/gallium/drivers/v3d/v3d_cl.h @@ -0,0 +1,279 @@ +/* + * Copyright © 2014-2017 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef VC5_CL_H +#define VC5_CL_H + +#include + +#include "util/u_math.h" +#include "util/macros.h" + +struct vc5_bo; +struct vc5_job; +struct vc5_cl; + +/** + * Undefined structure, used for typechecking that you're passing the pointers + * to these functions correctly. + */ +struct vc5_cl_out; + +/** A reference to a BO used in the CL packing functions */ +struct vc5_cl_reloc { + struct vc5_bo *bo; + uint32_t offset; +}; + +static inline void cl_pack_emit_reloc(struct vc5_cl *cl, const struct vc5_cl_reloc *); + +#define __gen_user_data struct vc5_cl +#define __gen_address_type struct vc5_cl_reloc +#define __gen_address_offset(reloc) (((reloc)->bo ? (reloc)->bo->offset : 0) + \ + (reloc)->offset) +#define __gen_emit_reloc cl_pack_emit_reloc + +struct vc5_cl { + void *base; + struct vc5_job *job; + struct vc5_cl_out *next; + struct vc5_bo *bo; + uint32_t size; +}; + +void vc5_init_cl(struct vc5_job *job, struct vc5_cl *cl); +void vc5_destroy_cl(struct vc5_cl *cl); +void vc5_dump_cl(void *cl, uint32_t size, bool is_render); +uint32_t vc5_gem_hindex(struct vc5_job *job, struct vc5_bo *bo); + +struct PACKED unaligned_16 { uint16_t x; }; +struct PACKED unaligned_32 { uint32_t x; }; + +static inline uint32_t cl_offset(struct vc5_cl *cl) +{ + return (char *)cl->next - (char *)cl->base; +} + +static inline struct vc5_cl_reloc cl_get_address(struct vc5_cl *cl) +{ + return (struct vc5_cl_reloc){ .bo = cl->bo, .offset = cl_offset(cl) }; +} + +static inline void +cl_advance(struct vc5_cl_out **cl, uint32_t n) +{ + (*cl) = (struct vc5_cl_out *)((char *)(*cl) + n); +} + +static inline struct vc5_cl_out * +cl_start(struct vc5_cl *cl) +{ + return cl->next; +} + +static inline void +cl_end(struct vc5_cl *cl, struct vc5_cl_out *next) +{ + cl->next = next; + assert(cl_offset(cl) <= cl->size); +} + + +static inline void +put_unaligned_32(struct vc5_cl_out *ptr, uint32_t val) +{ + struct unaligned_32 *p = (void *)ptr; + p->x = val; +} + +static inline void +put_unaligned_16(struct vc5_cl_out *ptr, uint16_t val) +{ + struct unaligned_16 *p = (void *)ptr; + p->x = val; +} + +static inline void +cl_u8(struct vc5_cl_out **cl, uint8_t n) +{ + *(uint8_t *)(*cl) = n; + cl_advance(cl, 1); +} + +static inline void +cl_u16(struct vc5_cl_out **cl, uint16_t n) +{ + put_unaligned_16(*cl, n); + cl_advance(cl, 2); +} + +static inline void +cl_u32(struct vc5_cl_out **cl, uint32_t n) +{ + put_unaligned_32(*cl, n); + cl_advance(cl, 4); +} + +static inline void +cl_aligned_u32(struct vc5_cl_out **cl, uint32_t n) +{ + *(uint32_t *)(*cl) = n; + cl_advance(cl, 4); +} + +static inline void +cl_aligned_reloc(struct vc5_cl *cl, + struct vc5_cl_out **cl_out, + struct vc5_bo *bo, uint32_t offset) +{ + cl_aligned_u32(cl_out, bo->offset + offset); + vc5_job_add_bo(cl->job, bo); +} + +static inline void +cl_ptr(struct vc5_cl_out **cl, void *ptr) +{ + *(struct vc5_cl_out **)(*cl) = ptr; + cl_advance(cl, sizeof(void *)); +} + +static inline void +cl_f(struct vc5_cl_out **cl, float f) +{ + cl_u32(cl, fui(f)); +} + +static inline void +cl_aligned_f(struct vc5_cl_out **cl, float f) +{ + cl_aligned_u32(cl, fui(f)); +} + +/** + * Reference to a BO with its associated offset, used in the pack process. + */ +static inline struct vc5_cl_reloc +cl_address(struct vc5_bo *bo, uint32_t offset) +{ + struct vc5_cl_reloc reloc = { + .bo = bo, + .offset = offset, + }; + return reloc; +} + +uint32_t vc5_cl_ensure_space(struct vc5_cl *cl, uint32_t size, uint32_t align); +void vc5_cl_ensure_space_with_branch(struct vc5_cl *cl, uint32_t size); + +#define cl_packet_header(packet) V3DX(packet ## _header) +#define cl_packet_length(packet) V3DX(packet ## _length) +#define cl_packet_pack(packet) V3DX(packet ## _pack) +#define cl_packet_struct(packet) V3DX(packet) + +static inline void * +cl_get_emit_space(struct vc5_cl_out **cl, size_t size) +{ + void *addr = *cl; + cl_advance(cl, size); + return addr; +} + +/* Macro for setting up an emit of a CL struct. A temporary unpacked struct + * is created, which you get to set fields in of the form: + * + * cl_emit(bcl, FLAT_SHADE_FLAGS, flags) { + * .flags.flat_shade_flags = 1 << 2, + * } + * + * or default values only can be emitted with just: + * + * cl_emit(bcl, FLAT_SHADE_FLAGS, flags); + * + * The trick here is that we make a for loop that will execute the body + * (either the block or the ';' after the macro invocation) exactly once. + */ +#define cl_emit(cl, packet, name) \ + for (struct cl_packet_struct(packet) name = { \ + cl_packet_header(packet) \ + }, \ + *_loop_terminate = &name; \ + __builtin_expect(_loop_terminate != NULL, 1); \ + ({ \ + struct vc5_cl_out *cl_out = cl_start(cl); \ + cl_packet_pack(packet)(cl, (uint8_t *)cl_out, &name); \ + cl_advance(&cl_out, cl_packet_length(packet)); \ + cl_end(cl, cl_out); \ + _loop_terminate = NULL; \ + })) \ + +#define cl_emit_with_prepacked(cl, packet, prepacked, name) \ + for (struct cl_packet_struct(packet) name = { \ + cl_packet_header(packet) \ + }, \ + *_loop_terminate = &name; \ + __builtin_expect(_loop_terminate != NULL, 1); \ + ({ \ + struct vc5_cl_out *cl_out = cl_start(cl); \ + uint8_t packed[cl_packet_length(packet)]; \ + cl_packet_pack(packet)(cl, packed, &name); \ + for (int _i = 0; _i < cl_packet_length(packet); _i++) \ + ((uint8_t *)cl_out)[_i] = packed[_i] | (prepacked)[_i]; \ + cl_advance(&cl_out, cl_packet_length(packet)); \ + cl_end(cl, cl_out); \ + _loop_terminate = NULL; \ + })) \ + +#define cl_emit_prepacked(cl, packet) do { \ + memcpy((cl)->next, packet, sizeof(*packet)); \ + cl_advance(&(cl)->next, sizeof(*packet)); \ +} while (0) + +#define v3dx_pack(packed, packet, name) \ + for (struct cl_packet_struct(packet) name = { \ + cl_packet_header(packet) \ + }, \ + *_loop_terminate = &name; \ + __builtin_expect(_loop_terminate != NULL, 1); \ + ({ \ + cl_packet_pack(packet)(NULL, (uint8_t *)packed, &name); \ + VG(VALGRIND_CHECK_MEM_IS_DEFINED((uint8_t *)packed, \ + cl_packet_length(packet))); \ + _loop_terminate = NULL; \ + })) \ + +/** + * Helper function called by the XML-generated pack functions for filling in + * an address field in shader records. + * + * Since we have a private address space as of VC5, our BOs can have lifelong + * offsets, and all the kernel needs to know is which BOs need to be paged in + * for this exec. + */ +static inline void +cl_pack_emit_reloc(struct vc5_cl *cl, const struct vc5_cl_reloc *reloc) +{ + if (reloc->bo) + vc5_job_add_bo(cl->job, reloc->bo); +} + +#endif /* VC5_CL_H */ diff --git a/src/gallium/drivers/v3d/v3d_context.c b/src/gallium/drivers/v3d/v3d_context.c new file mode 100644 index 00000000000..cb37eba3841 --- /dev/null +++ b/src/gallium/drivers/v3d/v3d_context.c @@ -0,0 +1,183 @@ +/* + * Copyright © 2014-2017 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include + +#include "pipe/p_defines.h" +#include "util/hash_table.h" +#include "util/ralloc.h" +#include "util/u_inlines.h" +#include "util/u_memory.h" +#include "util/u_blitter.h" +#include "util/u_upload_mgr.h" +#include "indices/u_primconvert.h" +#include "pipe/p_screen.h" + +#include "v3d_screen.h" +#include "v3d_context.h" +#include "v3d_resource.h" + +void +vc5_flush(struct pipe_context *pctx) +{ + struct vc5_context *vc5 = vc5_context(pctx); + + struct hash_entry *entry; + hash_table_foreach(vc5->jobs, entry) { + struct vc5_job *job = entry->data; + vc5_job_submit(vc5, job); + } +} + +static void +vc5_pipe_flush(struct pipe_context *pctx, struct pipe_fence_handle **fence, + unsigned flags) +{ + struct vc5_context *vc5 = vc5_context(pctx); + + vc5_flush(pctx); + + if (fence) { + struct pipe_screen *screen = pctx->screen; + struct vc5_fence *f = vc5_fence_create(vc5); + screen->fence_reference(screen, fence, NULL); + *fence = (struct pipe_fence_handle *)f; + } +} + +static void +vc5_invalidate_resource(struct pipe_context *pctx, struct pipe_resource *prsc) +{ + struct vc5_context *vc5 = vc5_context(pctx); + struct vc5_resource *rsc = vc5_resource(prsc); + + rsc->initialized_buffers = 0; + + struct hash_entry *entry = _mesa_hash_table_search(vc5->write_jobs, + prsc); + if (!entry) + return; + + struct vc5_job *job = entry->data; + if (job->key.zsbuf && job->key.zsbuf->texture == prsc) + job->resolve &= ~(PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL); +} + +static void +vc5_context_destroy(struct pipe_context *pctx) +{ + struct vc5_context *vc5 = vc5_context(pctx); + + vc5_flush(pctx); + + if (vc5->blitter) + util_blitter_destroy(vc5->blitter); + + if (vc5->primconvert) + util_primconvert_destroy(vc5->primconvert); + + if (vc5->uploader) + u_upload_destroy(vc5->uploader); + + slab_destroy_child(&vc5->transfer_pool); + + pipe_surface_reference(&vc5->framebuffer.cbufs[0], NULL); + pipe_surface_reference(&vc5->framebuffer.zsbuf, NULL); + + vc5_program_fini(pctx); + + ralloc_free(vc5); +} + +struct pipe_context * +vc5_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags) +{ + struct vc5_screen *screen = vc5_screen(pscreen); + struct vc5_context *vc5; + + /* Prevent dumping of the shaders built during context setup. */ + uint32_t saved_shaderdb_flag = V3D_DEBUG & V3D_DEBUG_SHADERDB; + V3D_DEBUG &= ~V3D_DEBUG_SHADERDB; + + vc5 = rzalloc(NULL, struct vc5_context); + if (!vc5) + return NULL; + struct pipe_context *pctx = &vc5->base; + + vc5->screen = screen; + + int ret = drmSyncobjCreate(screen->fd, DRM_SYNCOBJ_CREATE_SIGNALED, + &vc5->out_sync); + if (ret) { + ralloc_free(vc5); + return NULL; + } + + pctx->screen = pscreen; + pctx->priv = priv; + pctx->destroy = vc5_context_destroy; + pctx->flush = vc5_pipe_flush; + pctx->invalidate_resource = vc5_invalidate_resource; + + if (screen->devinfo.ver >= 41) { + v3d41_draw_init(pctx); + v3d41_state_init(pctx); + } else { + v3d33_draw_init(pctx); + v3d33_state_init(pctx); + } + vc5_program_init(pctx); + vc5_query_init(pctx); + vc5_resource_context_init(pctx); + + vc5_job_init(vc5); + + vc5->fd = screen->fd; + + slab_create_child(&vc5->transfer_pool, &screen->transfer_pool); + + vc5->uploader = u_upload_create_default(&vc5->base); + vc5->base.stream_uploader = vc5->uploader; + vc5->base.const_uploader = vc5->uploader; + + vc5->blitter = util_blitter_create(pctx); + if (!vc5->blitter) + goto fail; + + vc5->primconvert = util_primconvert_create(pctx, + (1 << PIPE_PRIM_QUADS) - 1); + if (!vc5->primconvert) + goto fail; + + V3D_DEBUG |= saved_shaderdb_flag; + + vc5->sample_mask = (1 << VC5_MAX_SAMPLES) - 1; + vc5->active_queries = true; + + return &vc5->base; + +fail: + pctx->destroy(pctx); + return NULL; +} diff --git a/src/gallium/drivers/v3d/v3d_context.h b/src/gallium/drivers/v3d/v3d_context.h new file mode 100644 index 00000000000..7c17eccd47e --- /dev/null +++ b/src/gallium/drivers/v3d/v3d_context.h @@ -0,0 +1,565 @@ +/* + * Copyright © 2014-2017 Broadcom + * Copyright (C) 2012 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef VC5_CONTEXT_H +#define VC5_CONTEXT_H + +#ifdef V3D_VERSION +#include "broadcom/common/v3d_macros.h" +#endif + +#include + +#include "pipe/p_context.h" +#include "pipe/p_state.h" +#include "util/bitset.h" +#include "util/slab.h" +#include "xf86drm.h" +#include "v3d_drm.h" +#include "v3d_screen.h" + +struct vc5_job; +struct vc5_bo; +void vc5_job_add_bo(struct vc5_job *job, struct vc5_bo *bo); + +#include "v3d_bufmgr.h" +#include "v3d_resource.h" +#include "v3d_cl.h" + +#ifdef USE_V3D_SIMULATOR +#define using_vc5_simulator true +#else +#define using_vc5_simulator false +#endif + +#define VC5_DIRTY_BLEND (1 << 0) +#define VC5_DIRTY_RASTERIZER (1 << 1) +#define VC5_DIRTY_ZSA (1 << 2) +#define VC5_DIRTY_FRAGTEX (1 << 3) +#define VC5_DIRTY_VERTTEX (1 << 4) + +#define VC5_DIRTY_BLEND_COLOR (1 << 7) +#define VC5_DIRTY_STENCIL_REF (1 << 8) +#define VC5_DIRTY_SAMPLE_MASK (1 << 9) +#define VC5_DIRTY_FRAMEBUFFER (1 << 10) +#define VC5_DIRTY_STIPPLE (1 << 11) +#define VC5_DIRTY_VIEWPORT (1 << 12) +#define VC5_DIRTY_CONSTBUF (1 << 13) +#define VC5_DIRTY_VTXSTATE (1 << 14) +#define VC5_DIRTY_VTXBUF (1 << 15) +#define VC5_DIRTY_SCISSOR (1 << 17) +#define VC5_DIRTY_FLAT_SHADE_FLAGS (1 << 18) +#define VC5_DIRTY_PRIM_MODE (1 << 19) +#define VC5_DIRTY_CLIP (1 << 20) +#define VC5_DIRTY_UNCOMPILED_VS (1 << 21) +#define VC5_DIRTY_UNCOMPILED_FS (1 << 22) +#define VC5_DIRTY_COMPILED_CS (1 << 23) +#define VC5_DIRTY_COMPILED_VS (1 << 24) +#define VC5_DIRTY_COMPILED_FS (1 << 25) +#define VC5_DIRTY_FS_INPUTS (1 << 26) +#define VC5_DIRTY_STREAMOUT (1 << 27) +#define VC5_DIRTY_OQ (1 << 28) +#define VC5_DIRTY_CENTROID_FLAGS (1 << 29) + +#define VC5_MAX_FS_INPUTS 64 + +struct vc5_sampler_view { + struct pipe_sampler_view base; + uint32_t p0; + uint32_t p1; + /* Precomputed swizzles to pass in to the shader key. */ + uint8_t swizzle[4]; + + uint8_t texture_shader_state[32]; + /* V3D 4.x: Texture state struct. */ + struct vc5_bo *bo; +}; + +struct vc5_sampler_state { + struct pipe_sampler_state base; + uint32_t p0; + uint32_t p1; + + /* V3D 3.x: Packed texture state. */ + uint8_t texture_shader_state[32]; + /* V3D 4.x: Sampler state struct. */ + struct vc5_bo *bo; +}; + +struct vc5_texture_stateobj { + struct pipe_sampler_view *textures[PIPE_MAX_SAMPLERS]; + unsigned num_textures; + struct pipe_sampler_state *samplers[PIPE_MAX_SAMPLERS]; + unsigned num_samplers; + struct vc5_cl_reloc texture_state[PIPE_MAX_SAMPLERS]; +}; + +struct vc5_shader_uniform_info { + enum quniform_contents *contents; + uint32_t *data; + uint32_t count; +}; + +struct vc5_uncompiled_shader { + /** A name for this program, so you can track it in shader-db output. */ + uint32_t program_id; + /** How many variants of this program were compiled, for shader-db. */ + uint32_t compiled_variant_count; + struct pipe_shader_state base; + uint32_t num_tf_outputs; + struct v3d_varying_slot *tf_outputs; + uint16_t tf_specs[16]; + uint16_t tf_specs_psiz[16]; + uint32_t num_tf_specs; + + /** + * Flag for if the NIR in this shader originally came from TGSI. If + * so, we need to do some fixups at compile time, due to missing + * information in TGSI that exists in NIR. + */ + bool was_tgsi; +}; + +struct vc5_compiled_shader { + struct vc5_bo *bo; + + union { + struct v3d_prog_data *base; + struct v3d_vs_prog_data *vs; + struct v3d_fs_prog_data *fs; + } prog_data; + + /** + * VC5_DIRTY_* flags that, when set in vc5->dirty, mean that the + * uniforms have to be rewritten (and therefore the shader state + * reemitted). + */ + uint32_t uniform_dirty_bits; +}; + +struct vc5_program_stateobj { + struct vc5_uncompiled_shader *bind_vs, *bind_fs; + struct vc5_compiled_shader *cs, *vs, *fs; + + struct vc5_bo *spill_bo; + int spill_size_per_thread; +}; + +struct vc5_constbuf_stateobj { + struct pipe_constant_buffer cb[PIPE_MAX_CONSTANT_BUFFERS]; + uint32_t enabled_mask; + uint32_t dirty_mask; +}; + +struct vc5_vertexbuf_stateobj { + struct pipe_vertex_buffer vb[PIPE_MAX_ATTRIBS]; + unsigned count; + uint32_t enabled_mask; + uint32_t dirty_mask; +}; + +struct vc5_vertex_stateobj { + struct pipe_vertex_element pipe[VC5_MAX_ATTRIBUTES]; + unsigned num_elements; + + uint8_t attrs[12 * VC5_MAX_ATTRIBUTES]; + struct vc5_bo *default_attribute_values; +}; + +struct vc5_streamout_stateobj { + struct pipe_stream_output_target *targets[PIPE_MAX_SO_BUFFERS]; + unsigned num_targets; +}; + +/* Hash table key for vc5->jobs */ +struct vc5_job_key { + struct pipe_surface *cbufs[4]; + struct pipe_surface *zsbuf; +}; + +enum vc5_ez_state { + VC5_EZ_UNDECIDED = 0, + VC5_EZ_GT_GE, + VC5_EZ_LT_LE, + VC5_EZ_DISABLED, +}; + +/** + * A complete bin/render job. + * + * This is all of the state necessary to submit a bin/render to the kernel. + * We want to be able to have multiple in progress at a time, so that we don't + * need to flush an existing CL just to switch to rendering to a new render + * target (which would mean reading back from the old render target when + * starting to render to it again). + */ +struct vc5_job { + struct vc5_context *vc5; + struct vc5_cl bcl; + struct vc5_cl rcl; + struct vc5_cl indirect; + struct vc5_bo *tile_alloc; + struct vc5_bo *tile_state; + uint32_t shader_rec_count; + + struct drm_v3d_submit_cl submit; + + /** + * Set of all BOs referenced by the job. This will be used for making + * the list of BOs that the kernel will need to have paged in to + * execute our job. + */ + struct set *bos; + + /** Sum of the sizes of the BOs referenced by the job. */ + uint32_t referenced_size; + + struct set *write_prscs; + + /* Size of the submit.bo_handles array. */ + uint32_t bo_handles_size; + + /** @{ Surfaces to submit rendering for. */ + struct pipe_surface *cbufs[4]; + struct pipe_surface *zsbuf; + /** @} */ + /** @{ + * Bounding box of the scissor across all queued drawing. + * + * Note that the max values are exclusive. + */ + uint32_t draw_min_x; + uint32_t draw_min_y; + uint32_t draw_max_x; + uint32_t draw_max_y; + /** @} */ + /** @{ + * Width/height of the color framebuffer being rendered to, + * for VC5_TILE_RENDERING_MODE_CONFIG. + */ + uint32_t draw_width; + uint32_t draw_height; + /** @} */ + /** @{ Tile information, depending on MSAA and float color buffer. */ + uint32_t draw_tiles_x; /** @< Number of tiles wide for framebuffer. */ + uint32_t draw_tiles_y; /** @< Number of tiles high for framebuffer. */ + + uint32_t tile_width; /** @< Width of a tile. */ + uint32_t tile_height; /** @< Height of a tile. */ + /** maximum internal_bpp of all color render targets. */ + uint32_t internal_bpp; + + /** Whether the current rendering is in a 4X MSAA tile buffer. */ + bool msaa; + /** @} */ + + /* Bitmask of PIPE_CLEAR_* of buffers that were cleared before the + * first rendering. + */ + uint32_t cleared; + /* Bitmask of PIPE_CLEAR_* of buffers that have been rendered to + * (either clears or draws). + */ + uint32_t resolve; + uint32_t clear_color[4][4]; + float clear_z; + uint8_t clear_s; + + /** + * Set if some drawing (triangles, blits, or just a glClear()) has + * been done to the FBO, meaning that we need to + * DRM_IOCTL_VC5_SUBMIT_CL. + */ + bool needs_flush; + + /** + * Set if there is a nonzero address for OCCLUSION_QUERY_COUNTER. If + * so, we need to disable it and flush before ending the CL, to keep + * the next tile from starting with it enabled. + */ + bool oq_enabled; + + /** + * Set when a packet enabling TF on all further primitives has been + * emitted. + */ + bool tf_enabled; + + /** + * Current EZ state for drawing. Updated at the start of draw after + * we've decided on the shader being rendered. + */ + enum vc5_ez_state ez_state; + /** + * The first EZ state that was used for drawing with a decided EZ + * direction (so either UNDECIDED, GT, or LT). + */ + enum vc5_ez_state first_ez_state; + + /** + * Number of draw calls (not counting full buffer clears) queued in + * the current job. + */ + uint32_t draw_calls_queued; + + struct vc5_job_key key; +}; + +struct vc5_context { + struct pipe_context base; + + int fd; + struct vc5_screen *screen; + + /** The 3D rendering job for the currently bound FBO. */ + struct vc5_job *job; + + /* Map from struct vc5_job_key to the job for that FBO. + */ + struct hash_table *jobs; + + /** + * Map from vc5_resource to a job writing to that resource. + * + * Primarily for flushing jobs rendering to textures that are now + * being read from. + */ + struct hash_table *write_jobs; + + struct slab_child_pool transfer_pool; + struct blitter_context *blitter; + + /** bitfield of VC5_DIRTY_* */ + uint32_t dirty; + + struct primconvert_context *primconvert; + + struct hash_table *fs_cache, *vs_cache; + uint32_t next_uncompiled_program_id; + uint64_t next_compiled_program_id; + + struct vc5_compiler_state *compiler_state; + + uint8_t prim_mode; + + /** Maximum index buffer valid for the current shader_rec. */ + uint32_t max_index; + + /** Sync object that our RCL will update as its out_sync. */ + uint32_t out_sync; + + struct u_upload_mgr *uploader; + + /** @{ Current pipeline state objects */ + struct pipe_scissor_state scissor; + struct pipe_blend_state *blend; + struct vc5_rasterizer_state *rasterizer; + struct vc5_depth_stencil_alpha_state *zsa; + + struct vc5_texture_stateobj verttex, fragtex; + + struct vc5_program_stateobj prog; + + struct vc5_vertex_stateobj *vtx; + + struct { + struct pipe_blend_color f; + uint16_t hf[4]; + } blend_color; + struct pipe_stencil_ref stencil_ref; + unsigned sample_mask; + struct pipe_framebuffer_state framebuffer; + + /* Per render target, whether we should swap the R and B fields in the + * shader's color output and in blending. If render targets disagree + * on the R/B swap and use the constant color, then we would need to + * fall back to in-shader blending. + */ + uint8_t swap_color_rb; + + /* Per render target, whether we should treat the dst alpha values as + * one in blending. + * + * For RGBX formats, the tile buffer's alpha channel will be + * undefined. + */ + uint8_t blend_dst_alpha_one; + + bool active_queries; + + uint32_t tf_prims_generated; + uint32_t prims_generated; + + struct pipe_poly_stipple stipple; + struct pipe_clip_state clip; + struct pipe_viewport_state viewport; + struct vc5_constbuf_stateobj constbuf[PIPE_SHADER_TYPES]; + struct vc5_vertexbuf_stateobj vertexbuf; + struct vc5_streamout_stateobj streamout; + struct vc5_bo *current_oq; + /** @} */ +}; + +struct vc5_rasterizer_state { + struct pipe_rasterizer_state base; + + /* VC5_CONFIGURATION_BITS */ + uint8_t config_bits[3]; + + float point_size; + + /** + * Half-float (1/8/7 bits) value of polygon offset units for + * VC5_PACKET_DEPTH_OFFSET + */ + uint16_t offset_units; + /** + * Half-float (1/8/7 bits) value of polygon offset scale for + * VC5_PACKET_DEPTH_OFFSET + */ + uint16_t offset_factor; +}; + +struct vc5_depth_stencil_alpha_state { + struct pipe_depth_stencil_alpha_state base; + + enum vc5_ez_state ez_state; + + /** Uniforms for stencil state. + * + * Index 0 is either the front config, or the front-and-back config. + * Index 1 is the back config if doing separate back stencil. + * Index 2 is the writemask config if it's not a common mask value. + */ + uint32_t stencil_uniforms[3]; + + uint8_t stencil_front[6]; + uint8_t stencil_back[6]; +}; + +#define perf_debug(...) do { \ + if (unlikely(V3D_DEBUG & V3D_DEBUG_PERF)) \ + fprintf(stderr, __VA_ARGS__); \ +} while (0) + +static inline struct vc5_context * +vc5_context(struct pipe_context *pcontext) +{ + return (struct vc5_context *)pcontext; +} + +static inline struct vc5_sampler_view * +vc5_sampler_view(struct pipe_sampler_view *psview) +{ + return (struct vc5_sampler_view *)psview; +} + +static inline struct vc5_sampler_state * +vc5_sampler_state(struct pipe_sampler_state *psampler) +{ + return (struct vc5_sampler_state *)psampler; +} + +struct pipe_context *vc5_context_create(struct pipe_screen *pscreen, + void *priv, unsigned flags); +void vc5_program_init(struct pipe_context *pctx); +void vc5_program_fini(struct pipe_context *pctx); +void vc5_query_init(struct pipe_context *pctx); + +void vc5_simulator_init(struct vc5_screen *screen); +void vc5_simulator_destroy(struct vc5_screen *screen); +int vc5_simulator_flush(struct vc5_context *vc5, + struct drm_v3d_submit_cl *args, + struct vc5_job *job); +int vc5_simulator_ioctl(int fd, unsigned long request, void *arg); +void vc5_simulator_open_from_handle(int fd, uint32_t winsys_stride, + int handle, uint32_t size); + +static inline int +vc5_ioctl(int fd, unsigned long request, void *arg) +{ + if (using_vc5_simulator) + return vc5_simulator_ioctl(fd, request, arg); + else + return drmIoctl(fd, request, arg); +} + +void vc5_set_shader_uniform_dirty_flags(struct vc5_compiled_shader *shader); +struct vc5_cl_reloc vc5_write_uniforms(struct vc5_context *vc5, + struct vc5_compiled_shader *shader, + struct vc5_constbuf_stateobj *cb, + struct vc5_texture_stateobj *texstate); + +void vc5_flush(struct pipe_context *pctx); +void vc5_job_init(struct vc5_context *vc5); +struct vc5_job *vc5_get_job(struct vc5_context *vc5, + struct pipe_surface **cbufs, + struct pipe_surface *zsbuf); +struct vc5_job *vc5_get_job_for_fbo(struct vc5_context *vc5); +void vc5_job_add_bo(struct vc5_job *job, struct vc5_bo *bo); +void vc5_job_add_write_resource(struct vc5_job *job, struct pipe_resource *prsc); +void vc5_job_submit(struct vc5_context *vc5, struct vc5_job *job); +void vc5_flush_jobs_writing_resource(struct vc5_context *vc5, + struct pipe_resource *prsc); +void vc5_flush_jobs_reading_resource(struct vc5_context *vc5, + struct pipe_resource *prsc); +void vc5_update_compiled_shaders(struct vc5_context *vc5, uint8_t prim_mode); + +bool vc5_rt_format_supported(const struct v3d_device_info *devinfo, + enum pipe_format f); +bool vc5_tex_format_supported(const struct v3d_device_info *devinfo, + enum pipe_format f); +uint8_t vc5_get_rt_format(const struct v3d_device_info *devinfo, enum pipe_format f); +uint8_t vc5_get_tex_format(const struct v3d_device_info *devinfo, enum pipe_format f); +uint8_t vc5_get_tex_return_size(const struct v3d_device_info *devinfo, + enum pipe_format f, + enum pipe_tex_compare compare); +uint8_t vc5_get_tex_return_channels(const struct v3d_device_info *devinfo, + enum pipe_format f); +const uint8_t *vc5_get_format_swizzle(const struct v3d_device_info *devinfo, + enum pipe_format f); +void vc5_get_internal_type_bpp_for_output_format(const struct v3d_device_info *devinfo, + uint32_t format, + uint32_t *type, + uint32_t *bpp); + +void vc5_init_query_functions(struct vc5_context *vc5); +void vc5_blit(struct pipe_context *pctx, const struct pipe_blit_info *blit_info); +void vc5_blitter_save(struct vc5_context *vc5); + +struct vc5_fence *vc5_fence_create(struct vc5_context *vc5); + +#ifdef v3dX +# include "v3dx_context.h" +#else +# define v3dX(x) v3d33_##x +# include "v3dx_context.h" +# undef v3dX + +# define v3dX(x) v3d41_##x +# include "v3dx_context.h" +# undef v3dX +#endif + +#endif /* VC5_CONTEXT_H */ diff --git a/src/gallium/drivers/v3d/v3d_fence.c b/src/gallium/drivers/v3d/v3d_fence.c new file mode 100644 index 00000000000..54bce562403 --- /dev/null +++ b/src/gallium/drivers/v3d/v3d_fence.c @@ -0,0 +1,104 @@ +/* + * Copyright © 2014 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/** @file vc5_fence.c + * + * Seqno-based fence management. + * + * We have two mechanisms for waiting in our kernel API: You can wait on a BO + * to have all rendering to from any process to be completed, or wait on a + * seqno for that particular seqno to be passed. The fence API we're + * implementing is based on waiting for all rendering in the context to have + * completed (with no reference to what other processes might be doing with + * the same BOs), so we can just use the seqno of the last rendering we'd + * fired off as our fence marker. + */ + +#include "util/u_inlines.h" + +#include "v3d_context.h" +#include "v3d_bufmgr.h" + +struct vc5_fence { + struct pipe_reference reference; + uint32_t sync; +}; + +static void +vc5_fence_reference(struct pipe_screen *pscreen, + struct pipe_fence_handle **pp, + struct pipe_fence_handle *pf) +{ + struct vc5_screen *screen = vc5_screen(pscreen); + struct vc5_fence **p = (struct vc5_fence **)pp; + struct vc5_fence *f = (struct vc5_fence *)pf; + struct vc5_fence *old = *p; + + if (pipe_reference(&(*p)->reference, &f->reference)) { + drmSyncobjDestroy(screen->fd, old->sync); + free(old); + } + *p = f; +} + +static boolean +vc5_fence_finish(struct pipe_screen *pscreen, + struct pipe_context *ctx, + struct pipe_fence_handle *pf, + uint64_t timeout_ns) +{ + struct vc5_screen *screen = vc5_screen(pscreen); + struct vc5_fence *f = (struct vc5_fence *)pf; + + return drmSyncobjWait(screen->fd, &f->sync, 1, timeout_ns, 0, NULL); +} + +struct vc5_fence * +vc5_fence_create(struct vc5_context *vc5) +{ + struct vc5_fence *f = calloc(1, sizeof(*f)); + if (!f) + return NULL; + + uint32_t new_sync; + /* Make a new sync object for the context. */ + int ret = drmSyncobjCreate(vc5->fd, DRM_SYNCOBJ_CREATE_SIGNALED, + &new_sync); + if (ret) { + free(f); + return NULL; + } + + pipe_reference_init(&f->reference, 1); + f->sync = vc5->out_sync; + vc5->out_sync = new_sync; + + return f; +} + +void +vc5_fence_init(struct vc5_screen *screen) +{ + screen->base.fence_reference = vc5_fence_reference; + screen->base.fence_finish = vc5_fence_finish; +} diff --git a/src/gallium/drivers/v3d/v3d_format_table.h b/src/gallium/drivers/v3d/v3d_format_table.h new file mode 100644 index 00000000000..8b8011351a1 --- /dev/null +++ b/src/gallium/drivers/v3d/v3d_format_table.h @@ -0,0 +1,54 @@ +/* + * Copyright © 2014-2018 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#define V3D_OUTPUT_IMAGE_FORMAT_NO 255 + +#include +#include + +struct vc5_format { + /** Set if the pipe format is defined in the table. */ + bool present; + + /** One of V3D33_OUTPUT_IMAGE_FORMAT_*, or OUTPUT_IMAGE_FORMAT_NO */ + uint8_t rt_type; + + /** One of V3D33_TEXTURE_DATA_FORMAT_*. */ + uint8_t tex_type; + + /** + * Swizzle to apply to the RGBA shader output for storing to the tile + * buffer, to the RGBA tile buffer to produce shader input (for + * blending), and for turning the rgba8888 texture sampler return + * value into shader rgba values. + */ + uint8_t swizzle[4]; + + /* Whether the return value is 16F/I/UI or 32F/I/UI. */ + uint8_t return_size; + + /* If return_size == 32, how many channels are returned by texturing. + * 16 always returns 2 pairs of 16 bit values. + */ + uint8_t return_channels; +}; diff --git a/src/gallium/drivers/v3d/v3d_formats.c b/src/gallium/drivers/v3d/v3d_formats.c new file mode 100644 index 00000000000..8424b368cf4 --- /dev/null +++ b/src/gallium/drivers/v3d/v3d_formats.c @@ -0,0 +1,144 @@ +/* + * Copyright © 2014-2017 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/** + * @file vc5_formats.c + * + * Contains the table and accessors for VC5 texture and render target format + * support. + * + * The hardware has limited support for texture formats, and extremely limited + * support for render target formats. As a result, we emulate other formats + * in our shader code, and this stores the table for doing so. + */ + +#include "util/macros.h" + +#include "v3d_context.h" +#include "v3d_format_table.h" + +static const struct vc5_format * +get_format(const struct v3d_device_info *devinfo, enum pipe_format f) +{ + if (devinfo->ver >= 41) + return v3d41_get_format_desc(f); + else + return v3d33_get_format_desc(f); +} + +bool +vc5_rt_format_supported(const struct v3d_device_info *devinfo, + enum pipe_format f) +{ + const struct vc5_format *vf = get_format(devinfo, f); + + if (!vf) + return false; + + return vf->rt_type != V3D_OUTPUT_IMAGE_FORMAT_NO; +} + +uint8_t +vc5_get_rt_format(const struct v3d_device_info *devinfo, enum pipe_format f) +{ + const struct vc5_format *vf = get_format(devinfo, f); + + if (!vf) + return 0; + + return vf->rt_type; +} + +bool +vc5_tex_format_supported(const struct v3d_device_info *devinfo, + enum pipe_format f) +{ + const struct vc5_format *vf = get_format(devinfo, f); + + return vf != NULL; +} + +uint8_t +vc5_get_tex_format(const struct v3d_device_info *devinfo, enum pipe_format f) +{ + const struct vc5_format *vf = get_format(devinfo, f); + + if (!vf) + return 0; + + return vf->tex_type; +} + +uint8_t +vc5_get_tex_return_size(const struct v3d_device_info *devinfo, + enum pipe_format f, enum pipe_tex_compare compare) +{ + const struct vc5_format *vf = get_format(devinfo, f); + + if (!vf) + return 0; + + if (compare == PIPE_TEX_COMPARE_R_TO_TEXTURE) + return 16; + + return vf->return_size; +} + +uint8_t +vc5_get_tex_return_channels(const struct v3d_device_info *devinfo, + enum pipe_format f) +{ + const struct vc5_format *vf = get_format(devinfo, f); + + if (!vf) + return 0; + + return vf->return_channels; +} + +const uint8_t * +vc5_get_format_swizzle(const struct v3d_device_info *devinfo, enum pipe_format f) +{ + const struct vc5_format *vf = get_format(devinfo, f); + static const uint8_t fallback[] = {0, 1, 2, 3}; + + if (!vf) + return fallback; + + return vf->swizzle; +} + +void +vc5_get_internal_type_bpp_for_output_format(const struct v3d_device_info *devinfo, + uint32_t format, + uint32_t *type, + uint32_t *bpp) +{ + if (devinfo->ver >= 41) { + return v3d41_get_internal_type_bpp_for_output_format(format, + type, bpp); + } else { + return v3d33_get_internal_type_bpp_for_output_format(format, + type, bpp); + } +} diff --git a/src/gallium/drivers/v3d/v3d_job.c b/src/gallium/drivers/v3d/v3d_job.c new file mode 100644 index 00000000000..85c64df34ca --- /dev/null +++ b/src/gallium/drivers/v3d/v3d_job.c @@ -0,0 +1,452 @@ +/* + * Copyright © 2014-2017 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/** @file vc5_job.c + * + * Functions for submitting VC5 render jobs to the kernel. + */ + +#include +#include "v3d_context.h" +/* The OQ/semaphore packets are the same across V3D versions. */ +#define V3D_VERSION 33 +#include "broadcom/cle/v3dx_pack.h" +#include "broadcom/common/v3d_macros.h" +#include "util/hash_table.h" +#include "util/ralloc.h" +#include "util/set.h" +#include "broadcom/clif/clif_dump.h" + +static void +remove_from_ht(struct hash_table *ht, void *key) +{ + struct hash_entry *entry = _mesa_hash_table_search(ht, key); + _mesa_hash_table_remove(ht, entry); +} + +static void +vc5_job_free(struct vc5_context *vc5, struct vc5_job *job) +{ + struct set_entry *entry; + + set_foreach(job->bos, entry) { + struct vc5_bo *bo = (struct vc5_bo *)entry->key; + vc5_bo_unreference(&bo); + } + + remove_from_ht(vc5->jobs, &job->key); + + if (job->write_prscs) { + struct set_entry *entry; + + set_foreach(job->write_prscs, entry) { + const struct pipe_resource *prsc = entry->key; + + remove_from_ht(vc5->write_jobs, (void *)prsc); + } + } + + for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++) { + if (job->cbufs[i]) { + remove_from_ht(vc5->write_jobs, job->cbufs[i]->texture); + pipe_surface_reference(&job->cbufs[i], NULL); + } + } + if (job->zsbuf) { + remove_from_ht(vc5->write_jobs, job->zsbuf->texture); + pipe_surface_reference(&job->zsbuf, NULL); + } + + if (vc5->job == job) + vc5->job = NULL; + + vc5_destroy_cl(&job->bcl); + vc5_destroy_cl(&job->rcl); + vc5_destroy_cl(&job->indirect); + vc5_bo_unreference(&job->tile_alloc); + vc5_bo_unreference(&job->tile_state); + + ralloc_free(job); +} + +static struct vc5_job * +vc5_job_create(struct vc5_context *vc5) +{ + struct vc5_job *job = rzalloc(vc5, struct vc5_job); + + job->vc5 = vc5; + + vc5_init_cl(job, &job->bcl); + vc5_init_cl(job, &job->rcl); + vc5_init_cl(job, &job->indirect); + + job->draw_min_x = ~0; + job->draw_min_y = ~0; + job->draw_max_x = 0; + job->draw_max_y = 0; + + job->bos = _mesa_set_create(job, + _mesa_hash_pointer, + _mesa_key_pointer_equal); + return job; +} + +void +vc5_job_add_bo(struct vc5_job *job, struct vc5_bo *bo) +{ + if (!bo) + return; + + if (_mesa_set_search(job->bos, bo)) + return; + + vc5_bo_reference(bo); + _mesa_set_add(job->bos, bo); + job->referenced_size += bo->size; + + uint32_t *bo_handles = (void *)(uintptr_t)job->submit.bo_handles; + + if (job->submit.bo_handle_count >= job->bo_handles_size) { + job->bo_handles_size = MAX2(4, job->bo_handles_size * 2); + bo_handles = reralloc(job, bo_handles, + uint32_t, job->bo_handles_size); + job->submit.bo_handles = (uintptr_t)(void *)bo_handles; + } + bo_handles[job->submit.bo_handle_count++] = bo->handle; +} + +void +vc5_job_add_write_resource(struct vc5_job *job, struct pipe_resource *prsc) +{ + struct vc5_context *vc5 = job->vc5; + + if (!job->write_prscs) { + job->write_prscs = _mesa_set_create(job, + _mesa_hash_pointer, + _mesa_key_pointer_equal); + } + + _mesa_set_add(job->write_prscs, prsc); + _mesa_hash_table_insert(vc5->write_jobs, prsc, job); +} + +void +vc5_flush_jobs_writing_resource(struct vc5_context *vc5, + struct pipe_resource *prsc) +{ + struct hash_entry *entry = _mesa_hash_table_search(vc5->write_jobs, + prsc); + if (entry) { + struct vc5_job *job = entry->data; + vc5_job_submit(vc5, job); + } +} + +void +vc5_flush_jobs_reading_resource(struct vc5_context *vc5, + struct pipe_resource *prsc) +{ + struct vc5_resource *rsc = vc5_resource(prsc); + + vc5_flush_jobs_writing_resource(vc5, prsc); + + struct hash_entry *entry; + hash_table_foreach(vc5->jobs, entry) { + struct vc5_job *job = entry->data; + + if (_mesa_set_search(job->bos, rsc->bo)) { + vc5_job_submit(vc5, job); + /* Reminder: vc5->jobs is safe to keep iterating even + * after deletion of an entry. + */ + continue; + } + } +} + +static void +vc5_job_set_tile_buffer_size(struct vc5_job *job) +{ + static const uint8_t tile_sizes[] = { + 64, 64, + 64, 32, + 32, 32, + 32, 16, + 16, 16, + }; + int tile_size_index = 0; + if (job->msaa) + tile_size_index += 2; + + if (job->cbufs[3] || job->cbufs[2]) + tile_size_index += 2; + else if (job->cbufs[1]) + tile_size_index++; + + int max_bpp = RENDER_TARGET_MAXIMUM_32BPP; + for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++) { + if (job->cbufs[i]) { + struct vc5_surface *surf = vc5_surface(job->cbufs[i]); + max_bpp = MAX2(max_bpp, surf->internal_bpp); + } + } + job->internal_bpp = max_bpp; + STATIC_ASSERT(RENDER_TARGET_MAXIMUM_32BPP == 0); + tile_size_index += max_bpp; + + assert(tile_size_index < ARRAY_SIZE(tile_sizes)); + job->tile_width = tile_sizes[tile_size_index * 2 + 0]; + job->tile_height = tile_sizes[tile_size_index * 2 + 1]; +} + +/** + * Returns a vc5_job struture for tracking V3D rendering to a particular FBO. + * + * If we've already started rendering to this FBO, then return old same job, + * otherwise make a new one. If we're beginning rendering to an FBO, make + * sure that any previous reads of the FBO (or writes to its color/Z surfaces) + * have been flushed. + */ +struct vc5_job * +vc5_get_job(struct vc5_context *vc5, + struct pipe_surface **cbufs, struct pipe_surface *zsbuf) +{ + /* Return the existing job for this FBO if we have one */ + struct vc5_job_key local_key = { + .cbufs = { + cbufs[0], + cbufs[1], + cbufs[2], + cbufs[3], + }, + .zsbuf = zsbuf, + }; + struct hash_entry *entry = _mesa_hash_table_search(vc5->jobs, + &local_key); + if (entry) + return entry->data; + + /* Creating a new job. Make sure that any previous jobs reading or + * writing these buffers are flushed. + */ + struct vc5_job *job = vc5_job_create(vc5); + + for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++) { + if (cbufs[i]) { + vc5_flush_jobs_reading_resource(vc5, cbufs[i]->texture); + pipe_surface_reference(&job->cbufs[i], cbufs[i]); + + if (cbufs[i]->texture->nr_samples > 1) + job->msaa = true; + } + } + if (zsbuf) { + vc5_flush_jobs_reading_resource(vc5, zsbuf->texture); + pipe_surface_reference(&job->zsbuf, zsbuf); + if (zsbuf->texture->nr_samples > 1) + job->msaa = true; + } + + vc5_job_set_tile_buffer_size(job); + + for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++) { + if (cbufs[i]) + _mesa_hash_table_insert(vc5->write_jobs, + cbufs[i]->texture, job); + } + if (zsbuf) + _mesa_hash_table_insert(vc5->write_jobs, zsbuf->texture, job); + + memcpy(&job->key, &local_key, sizeof(local_key)); + _mesa_hash_table_insert(vc5->jobs, &job->key, job); + + return job; +} + +struct vc5_job * +vc5_get_job_for_fbo(struct vc5_context *vc5) +{ + if (vc5->job) + return vc5->job; + + struct pipe_surface **cbufs = vc5->framebuffer.cbufs; + struct pipe_surface *zsbuf = vc5->framebuffer.zsbuf; + struct vc5_job *job = vc5_get_job(vc5, cbufs, zsbuf); + + /* The dirty flags are tracking what's been updated while vc5->job has + * been bound, so set them all to ~0 when switching between jobs. We + * also need to reset all state at the start of rendering. + */ + vc5->dirty = ~0; + + /* If we're binding to uninitialized buffers, no need to load their + * contents before drawing. + */ + for (int i = 0; i < 4; i++) { + if (cbufs[i]) { + struct vc5_resource *rsc = vc5_resource(cbufs[i]->texture); + if (!rsc->writes) + job->cleared |= PIPE_CLEAR_COLOR0 << i; + } + } + + if (zsbuf) { + struct vc5_resource *rsc = vc5_resource(zsbuf->texture); + if (!rsc->writes) + job->cleared |= PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL; + } + + job->draw_tiles_x = DIV_ROUND_UP(vc5->framebuffer.width, + job->tile_width); + job->draw_tiles_y = DIV_ROUND_UP(vc5->framebuffer.height, + job->tile_height); + + vc5->job = job; + + return job; +} + +static bool +vc5_clif_dump_lookup(void *data, uint32_t addr, void **vaddr) +{ + struct vc5_job *job = data; + struct set_entry *entry; + + set_foreach(job->bos, entry) { + struct vc5_bo *bo = (void *)entry->key; + + if (addr >= bo->offset && + addr < bo->offset + bo->size) { + vc5_bo_map(bo); + *vaddr = bo->map + addr - bo->offset; + return true; + } + } + + return false; +} + +static void +vc5_clif_dump(struct vc5_context *vc5, struct vc5_job *job) +{ + if (!(V3D_DEBUG & V3D_DEBUG_CL)) + return; + + struct clif_dump *clif = clif_dump_init(&vc5->screen->devinfo, + stderr, vc5_clif_dump_lookup, + job); + + fprintf(stderr, "BCL: 0x%08x..0x%08x\n", + job->submit.bcl_start, job->submit.bcl_end); + + clif_dump_add_cl(clif, job->submit.bcl_start, job->submit.bcl_end); + + fprintf(stderr, "RCL: 0x%08x..0x%08x\n", + job->submit.rcl_start, job->submit.rcl_end); + clif_dump_add_cl(clif, job->submit.rcl_start, job->submit.rcl_end); +} + +/** + * Submits the job to the kernel and then reinitializes it. + */ +void +vc5_job_submit(struct vc5_context *vc5, struct vc5_job *job) +{ + MAYBE_UNUSED struct vc5_screen *screen = vc5->screen; + + if (!job->needs_flush) + goto done; + + if (vc5->screen->devinfo.ver >= 41) + v3d41_emit_rcl(job); + else + v3d33_emit_rcl(job); + + if (cl_offset(&job->bcl) > 0) { + if (screen->devinfo.ver >= 41) + v3d41_bcl_epilogue(vc5, job); + else + v3d33_bcl_epilogue(vc5, job); + } + + job->submit.out_sync = vc5->out_sync; + job->submit.bcl_end = job->bcl.bo->offset + cl_offset(&job->bcl); + job->submit.rcl_end = job->rcl.bo->offset + cl_offset(&job->rcl); + + /* On V3D 4.1, the tile alloc/state setup moved to register writes + * instead of binner packets. + */ + if (screen->devinfo.ver >= 41) { + vc5_job_add_bo(job, job->tile_alloc); + job->submit.qma = job->tile_alloc->offset; + job->submit.qms = job->tile_alloc->size; + + vc5_job_add_bo(job, job->tile_state); + job->submit.qts = job->tile_state->offset; + } + + vc5_clif_dump(vc5, job); + + if (!(V3D_DEBUG & V3D_DEBUG_NORAST)) { + int ret; + +#ifndef USE_V3D_SIMULATOR + ret = drmIoctl(vc5->fd, DRM_IOCTL_V3D_SUBMIT_CL, &job->submit); +#else + ret = vc5_simulator_flush(vc5, &job->submit, job); +#endif + static bool warned = false; + if (ret && !warned) { + fprintf(stderr, "Draw call returned %s. " + "Expect corruption.\n", strerror(errno)); + warned = true; + } + } + +done: + vc5_job_free(vc5, job); +} + +static bool +vc5_job_compare(const void *a, const void *b) +{ + return memcmp(a, b, sizeof(struct vc5_job_key)) == 0; +} + +static uint32_t +vc5_job_hash(const void *key) +{ + return _mesa_hash_data(key, sizeof(struct vc5_job_key)); +} + +void +vc5_job_init(struct vc5_context *vc5) +{ + vc5->jobs = _mesa_hash_table_create(vc5, + vc5_job_hash, + vc5_job_compare); + vc5->write_jobs = _mesa_hash_table_create(vc5, + _mesa_hash_pointer, + _mesa_key_pointer_equal); +} + diff --git a/src/gallium/drivers/v3d/v3d_program.c b/src/gallium/drivers/v3d/v3d_program.c new file mode 100644 index 00000000000..ce2e0be8ed2 --- /dev/null +++ b/src/gallium/drivers/v3d/v3d_program.c @@ -0,0 +1,682 @@ +/* + * Copyright © 2014-2017 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include "util/u_format.h" +#include "util/u_math.h" +#include "util/u_memory.h" +#include "util/ralloc.h" +#include "util/hash_table.h" +#include "tgsi/tgsi_dump.h" +#include "tgsi/tgsi_parse.h" +#include "compiler/nir/nir.h" +#include "compiler/nir/nir_builder.h" +#include "nir/tgsi_to_nir.h" +#include "compiler/v3d_compiler.h" +#include "v3d_context.h" +#include "broadcom/cle/v3d_packet_v33_pack.h" +#include "mesa/state_tracker/st_glsl_types.h" + +static gl_varying_slot +vc5_get_slot_for_driver_location(nir_shader *s, uint32_t driver_location) +{ + nir_foreach_variable(var, &s->outputs) { + if (var->data.driver_location == driver_location) { + return var->data.location; + } + } + + return -1; +} + +/** + * Precomputes the TRANSFORM_FEEDBACK_OUTPUT_DATA_SPEC array for the shader. + * + * A shader can have 16 of these specs, and each one of them can write up to + * 16 dwords. Since we allow a total of 64 transform feedback output + * components (not 16 vectors), we have to group the writes of multiple + * varyings together in a single data spec. + */ +static void +vc5_set_transform_feedback_outputs(struct vc5_uncompiled_shader *so, + const struct pipe_stream_output_info *stream_output) +{ + if (!stream_output->num_outputs) + return; + + struct v3d_varying_slot slots[PIPE_MAX_SO_OUTPUTS * 4]; + int slot_count = 0; + + for (int buffer = 0; buffer < PIPE_MAX_SO_BUFFERS; buffer++) { + uint32_t buffer_offset = 0; + uint32_t vpm_start = slot_count; + + for (int i = 0; i < stream_output->num_outputs; i++) { + const struct pipe_stream_output *output = + &stream_output->output[i]; + + if (output->output_buffer != buffer) + continue; + + /* We assume that the SO outputs appear in increasing + * order in the buffer. + */ + assert(output->dst_offset >= buffer_offset); + + /* Pad any undefined slots in the output */ + for (int j = buffer_offset; j < output->dst_offset; j++) { + slots[slot_count] = + v3d_slot_from_slot_and_component(VARYING_SLOT_POS, 0); + slot_count++; + buffer_offset++; + } + + /* Set the coordinate shader up to output the + * components of this varying. + */ + for (int j = 0; j < output->num_components; j++) { + gl_varying_slot slot = + vc5_get_slot_for_driver_location(so->base.ir.nir, output->register_index); + + slots[slot_count] = + v3d_slot_from_slot_and_component(slot, + output->start_component + j); + slot_count++; + buffer_offset++; + } + } + + uint32_t vpm_size = slot_count - vpm_start; + if (!vpm_size) + continue; + + uint32_t vpm_start_offset = vpm_start + 6; + + while (vpm_size) { + uint32_t write_size = MIN2(vpm_size, 1 << 4); + + struct V3D33_TRANSFORM_FEEDBACK_OUTPUT_DATA_SPEC unpacked = { + /* We need the offset from the coordinate shader's VPM + * output block, which has the [X, Y, Z, W, Xs, Ys] + * values at the start. + */ + .first_shaded_vertex_value_to_output = vpm_start_offset, + .number_of_consecutive_vertex_values_to_output_as_32_bit_values_minus_1 = write_size - 1, + .output_buffer_to_write_to = buffer, + }; + + /* GFXH-1559 */ + assert(unpacked.first_shaded_vertex_value_to_output != 8 || + so->num_tf_specs != 0); + + assert(so->num_tf_specs != ARRAY_SIZE(so->tf_specs)); + V3D33_TRANSFORM_FEEDBACK_OUTPUT_DATA_SPEC_pack(NULL, + (void *)&so->tf_specs[so->num_tf_specs], + &unpacked); + + /* If point size is being written by the shader, then + * all the VPM start offsets are shifted up by one. + * We won't know that until the variant is compiled, + * though. + */ + unpacked.first_shaded_vertex_value_to_output++; + + /* GFXH-1559 */ + assert(unpacked.first_shaded_vertex_value_to_output != 8 || + so->num_tf_specs != 0); + + V3D33_TRANSFORM_FEEDBACK_OUTPUT_DATA_SPEC_pack(NULL, + (void *)&so->tf_specs_psiz[so->num_tf_specs], + &unpacked); + so->num_tf_specs++; + vpm_start_offset += write_size; + vpm_size -= write_size; + } + } + + so->num_tf_outputs = slot_count; + so->tf_outputs = ralloc_array(so->base.ir.nir, struct v3d_varying_slot, + slot_count); + memcpy(so->tf_outputs, slots, sizeof(*slots) * slot_count); +} + +static int +type_size(const struct glsl_type *type) +{ + return glsl_count_attribute_slots(type, false); +} + +static int +uniforms_type_size(const struct glsl_type *type) +{ + return st_glsl_storage_type_size(type, false); +} + +static void * +vc5_shader_state_create(struct pipe_context *pctx, + const struct pipe_shader_state *cso) +{ + struct vc5_context *vc5 = vc5_context(pctx); + struct vc5_uncompiled_shader *so = CALLOC_STRUCT(vc5_uncompiled_shader); + if (!so) + return NULL; + + so->program_id = vc5->next_uncompiled_program_id++; + + nir_shader *s; + + if (cso->type == PIPE_SHADER_IR_NIR) { + /* The backend takes ownership of the NIR shader on state + * creation. + */ + s = cso->ir.nir; + + NIR_PASS_V(s, nir_lower_io, nir_var_all & ~nir_var_uniform, + type_size, + (nir_lower_io_options)0); + NIR_PASS_V(s, nir_lower_io, nir_var_uniform, + uniforms_type_size, + (nir_lower_io_options)0); + } else { + assert(cso->type == PIPE_SHADER_IR_TGSI); + + if (V3D_DEBUG & V3D_DEBUG_TGSI) { + fprintf(stderr, "prog %d TGSI:\n", + so->program_id); + tgsi_dump(cso->tokens, 0); + fprintf(stderr, "\n"); + } + s = tgsi_to_nir(cso->tokens, &v3d_nir_options); + + so->was_tgsi = true; + } + + NIR_PASS_V(s, nir_opt_global_to_local); + NIR_PASS_V(s, nir_lower_regs_to_ssa); + NIR_PASS_V(s, nir_normalize_cubemap_coords); + + NIR_PASS_V(s, nir_lower_load_const_to_scalar); + + v3d_optimize_nir(s); + + NIR_PASS_V(s, nir_remove_dead_variables, nir_var_local); + + /* Garbage collect dead instructions */ + nir_sweep(s); + + so->base.type = PIPE_SHADER_IR_NIR; + so->base.ir.nir = s; + + vc5_set_transform_feedback_outputs(so, &cso->stream_output); + + if (V3D_DEBUG & (V3D_DEBUG_NIR | + v3d_debug_flag_for_shader_stage(s->info.stage))) { + fprintf(stderr, "%s prog %d NIR:\n", + gl_shader_stage_name(s->info.stage), + so->program_id); + nir_print_shader(s, stderr); + fprintf(stderr, "\n"); + } + + return so; +} + +static struct vc5_compiled_shader * +vc5_get_compiled_shader(struct vc5_context *vc5, struct v3d_key *key) +{ + struct vc5_uncompiled_shader *shader_state = key->shader_state; + nir_shader *s = shader_state->base.ir.nir; + + struct hash_table *ht; + uint32_t key_size; + if (s->info.stage == MESA_SHADER_FRAGMENT) { + ht = vc5->fs_cache; + key_size = sizeof(struct v3d_fs_key); + } else { + ht = vc5->vs_cache; + key_size = sizeof(struct v3d_vs_key); + } + + struct hash_entry *entry = _mesa_hash_table_search(ht, key); + if (entry) + return entry->data; + + struct vc5_compiled_shader *shader = + rzalloc(NULL, struct vc5_compiled_shader); + + int program_id = shader_state->program_id; + int variant_id = + p_atomic_inc_return(&shader_state->compiled_variant_count); + uint64_t *qpu_insts; + uint32_t shader_size; + + switch (s->info.stage) { + case MESA_SHADER_VERTEX: + shader->prog_data.vs = rzalloc(shader, struct v3d_vs_prog_data); + + qpu_insts = v3d_compile_vs(vc5->screen->compiler, + (struct v3d_vs_key *)key, + shader->prog_data.vs, s, + program_id, variant_id, + &shader_size); + break; + case MESA_SHADER_FRAGMENT: + shader->prog_data.fs = rzalloc(shader, struct v3d_fs_prog_data); + + qpu_insts = v3d_compile_fs(vc5->screen->compiler, + (struct v3d_fs_key *)key, + shader->prog_data.fs, s, + program_id, variant_id, + &shader_size); + break; + default: + unreachable("bad stage"); + } + + vc5_set_shader_uniform_dirty_flags(shader); + + shader->bo = vc5_bo_alloc(vc5->screen, shader_size, "shader"); + vc5_bo_map(shader->bo); + memcpy(shader->bo->map, qpu_insts, shader_size); + + free(qpu_insts); + + struct vc5_key *dup_key; + dup_key = ralloc_size(shader, key_size); + memcpy(dup_key, key, key_size); + _mesa_hash_table_insert(ht, dup_key, shader); + + if (shader->prog_data.base->spill_size > + vc5->prog.spill_size_per_thread) { + /* Max 4 QPUs per slice, 3 slices per core. We only do single + * core so far. This overallocates memory on smaller cores. + */ + int total_spill_size = + 4 * 3 * shader->prog_data.base->spill_size; + + vc5_bo_unreference(&vc5->prog.spill_bo); + vc5->prog.spill_bo = vc5_bo_alloc(vc5->screen, + total_spill_size, "spill"); + vc5->prog.spill_size_per_thread = + shader->prog_data.base->spill_size; + } + + return shader; +} + +static void +vc5_setup_shared_key(struct vc5_context *vc5, struct v3d_key *key, + struct vc5_texture_stateobj *texstate) +{ + const struct v3d_device_info *devinfo = &vc5->screen->devinfo; + + for (int i = 0; i < texstate->num_textures; i++) { + struct pipe_sampler_view *sampler = texstate->textures[i]; + struct vc5_sampler_view *vc5_sampler = vc5_sampler_view(sampler); + struct pipe_sampler_state *sampler_state = + texstate->samplers[i]; + + if (!sampler) + continue; + + key->tex[i].return_size = + vc5_get_tex_return_size(devinfo, + sampler->format, + sampler_state->compare_mode); + + /* For 16-bit, we set up the sampler to always return 2 + * channels (meaning no recompiles for most statechanges), + * while for 32 we actually scale the returns with channels. + */ + if (key->tex[i].return_size == 16) { + key->tex[i].return_channels = 2; + } else if (devinfo->ver > 40) { + key->tex[i].return_channels = 4; + } else { + key->tex[i].return_channels = + vc5_get_tex_return_channels(devinfo, + sampler->format); + } + + if (key->tex[i].return_size == 32 && devinfo->ver < 40) { + memcpy(key->tex[i].swizzle, + vc5_sampler->swizzle, + sizeof(vc5_sampler->swizzle)); + } else { + /* For 16-bit returns, we let the sampler state handle + * the swizzle. + */ + key->tex[i].swizzle[0] = PIPE_SWIZZLE_X; + key->tex[i].swizzle[1] = PIPE_SWIZZLE_Y; + key->tex[i].swizzle[2] = PIPE_SWIZZLE_Z; + key->tex[i].swizzle[3] = PIPE_SWIZZLE_W; + } + + if (sampler) { + key->tex[i].compare_mode = sampler_state->compare_mode; + key->tex[i].compare_func = sampler_state->compare_func; + key->tex[i].clamp_s = + sampler_state->wrap_s == PIPE_TEX_WRAP_CLAMP; + key->tex[i].clamp_t = + sampler_state->wrap_t == PIPE_TEX_WRAP_CLAMP; + key->tex[i].clamp_r = + sampler_state->wrap_r == PIPE_TEX_WRAP_CLAMP; + } + } + + key->ucp_enables = vc5->rasterizer->base.clip_plane_enable; +} + +static void +vc5_update_compiled_fs(struct vc5_context *vc5, uint8_t prim_mode) +{ + struct vc5_job *job = vc5->job; + struct v3d_fs_key local_key; + struct v3d_fs_key *key = &local_key; + + if (!(vc5->dirty & (VC5_DIRTY_PRIM_MODE | + VC5_DIRTY_BLEND | + VC5_DIRTY_FRAMEBUFFER | + VC5_DIRTY_ZSA | + VC5_DIRTY_RASTERIZER | + VC5_DIRTY_SAMPLE_MASK | + VC5_DIRTY_FRAGTEX | + VC5_DIRTY_UNCOMPILED_FS))) { + return; + } + + memset(key, 0, sizeof(*key)); + vc5_setup_shared_key(vc5, &key->base, &vc5->fragtex); + key->base.shader_state = vc5->prog.bind_fs; + key->is_points = (prim_mode == PIPE_PRIM_POINTS); + key->is_lines = (prim_mode >= PIPE_PRIM_LINES && + prim_mode <= PIPE_PRIM_LINE_STRIP); + key->clamp_color = vc5->rasterizer->base.clamp_fragment_color; + if (vc5->blend->logicop_enable) { + key->logicop_func = vc5->blend->logicop_func; + } else { + key->logicop_func = PIPE_LOGICOP_COPY; + } + if (job->msaa) { + key->msaa = vc5->rasterizer->base.multisample; + key->sample_coverage = (vc5->rasterizer->base.multisample && + vc5->sample_mask != (1 << VC5_MAX_SAMPLES) - 1); + key->sample_alpha_to_coverage = vc5->blend->alpha_to_coverage; + key->sample_alpha_to_one = vc5->blend->alpha_to_one; + } + + key->depth_enabled = (vc5->zsa->base.depth.enabled || + vc5->zsa->base.stencil[0].enabled); + if (vc5->zsa->base.alpha.enabled) { + key->alpha_test = true; + key->alpha_test_func = vc5->zsa->base.alpha.func; + } + + /* gl_FragColor's propagation to however many bound color buffers + * there are means that the buffer count needs to be in the key. + */ + key->nr_cbufs = vc5->framebuffer.nr_cbufs; + key->swap_color_rb = vc5->swap_color_rb; + + for (int i = 0; i < key->nr_cbufs; i++) { + struct pipe_surface *cbuf = vc5->framebuffer.cbufs[i]; + if (!cbuf) + continue; + + const struct util_format_description *desc = + util_format_description(cbuf->format); + + if (desc->channel[0].type == UTIL_FORMAT_TYPE_FLOAT && + desc->channel[0].size == 32) { + key->f32_color_rb |= 1 << i; + } + + if (vc5->prog.bind_fs->was_tgsi) { + if (util_format_is_pure_uint(cbuf->format)) + key->uint_color_rb |= 1 << i; + else if (util_format_is_pure_sint(cbuf->format)) + key->int_color_rb |= 1 << i; + } + } + + if (key->is_points) { + key->point_sprite_mask = + vc5->rasterizer->base.sprite_coord_enable; + key->point_coord_upper_left = + (vc5->rasterizer->base.sprite_coord_mode == + PIPE_SPRITE_COORD_UPPER_LEFT); + } + + key->light_twoside = vc5->rasterizer->base.light_twoside; + key->shade_model_flat = vc5->rasterizer->base.flatshade; + + struct vc5_compiled_shader *old_fs = vc5->prog.fs; + vc5->prog.fs = vc5_get_compiled_shader(vc5, &key->base); + if (vc5->prog.fs == old_fs) + return; + + vc5->dirty |= VC5_DIRTY_COMPILED_FS; + + if (old_fs) { + if (vc5->prog.fs->prog_data.fs->flat_shade_flags != + old_fs->prog_data.fs->flat_shade_flags) { + vc5->dirty |= VC5_DIRTY_FLAT_SHADE_FLAGS; + } + + if (vc5->prog.fs->prog_data.fs->centroid_flags != + old_fs->prog_data.fs->centroid_flags) { + vc5->dirty |= VC5_DIRTY_CENTROID_FLAGS; + } + } + + if (old_fs && memcmp(vc5->prog.fs->prog_data.fs->input_slots, + old_fs->prog_data.fs->input_slots, + sizeof(vc5->prog.fs->prog_data.fs->input_slots))) { + vc5->dirty |= VC5_DIRTY_FS_INPUTS; + } +} + +static void +vc5_update_compiled_vs(struct vc5_context *vc5, uint8_t prim_mode) +{ + struct v3d_vs_key local_key; + struct v3d_vs_key *key = &local_key; + + if (!(vc5->dirty & (VC5_DIRTY_PRIM_MODE | + VC5_DIRTY_RASTERIZER | + VC5_DIRTY_VERTTEX | + VC5_DIRTY_VTXSTATE | + VC5_DIRTY_UNCOMPILED_VS | + VC5_DIRTY_FS_INPUTS))) { + return; + } + + memset(key, 0, sizeof(*key)); + vc5_setup_shared_key(vc5, &key->base, &vc5->verttex); + key->base.shader_state = vc5->prog.bind_vs; + key->num_fs_inputs = vc5->prog.fs->prog_data.fs->base.num_inputs; + STATIC_ASSERT(sizeof(key->fs_inputs) == + sizeof(vc5->prog.fs->prog_data.fs->input_slots)); + memcpy(key->fs_inputs, vc5->prog.fs->prog_data.fs->input_slots, + sizeof(key->fs_inputs)); + key->clamp_color = vc5->rasterizer->base.clamp_vertex_color; + + key->per_vertex_point_size = + (prim_mode == PIPE_PRIM_POINTS && + vc5->rasterizer->base.point_size_per_vertex); + + struct vc5_compiled_shader *vs = + vc5_get_compiled_shader(vc5, &key->base); + if (vs != vc5->prog.vs) { + vc5->prog.vs = vs; + vc5->dirty |= VC5_DIRTY_COMPILED_VS; + } + + key->is_coord = true; + /* Coord shaders only output varyings used by transform feedback. */ + struct vc5_uncompiled_shader *shader_state = key->base.shader_state; + memcpy(key->fs_inputs, shader_state->tf_outputs, + sizeof(*key->fs_inputs) * shader_state->num_tf_outputs); + if (shader_state->num_tf_outputs < key->num_fs_inputs) { + memset(&key->fs_inputs[shader_state->num_tf_outputs], + 0, + sizeof(*key->fs_inputs) * (key->num_fs_inputs - + shader_state->num_tf_outputs)); + } + key->num_fs_inputs = shader_state->num_tf_outputs; + + struct vc5_compiled_shader *cs = + vc5_get_compiled_shader(vc5, &key->base); + if (cs != vc5->prog.cs) { + vc5->prog.cs = cs; + vc5->dirty |= VC5_DIRTY_COMPILED_CS; + } +} + +void +vc5_update_compiled_shaders(struct vc5_context *vc5, uint8_t prim_mode) +{ + vc5_update_compiled_fs(vc5, prim_mode); + vc5_update_compiled_vs(vc5, prim_mode); +} + +static uint32_t +fs_cache_hash(const void *key) +{ + return _mesa_hash_data(key, sizeof(struct v3d_fs_key)); +} + +static uint32_t +vs_cache_hash(const void *key) +{ + return _mesa_hash_data(key, sizeof(struct v3d_vs_key)); +} + +static bool +fs_cache_compare(const void *key1, const void *key2) +{ + return memcmp(key1, key2, sizeof(struct v3d_fs_key)) == 0; +} + +static bool +vs_cache_compare(const void *key1, const void *key2) +{ + return memcmp(key1, key2, sizeof(struct v3d_vs_key)) == 0; +} + +static void +delete_from_cache_if_matches(struct hash_table *ht, + struct vc5_compiled_shader **last_compile, + struct hash_entry *entry, + struct vc5_uncompiled_shader *so) +{ + const struct v3d_key *key = entry->key; + + if (key->shader_state == so) { + struct vc5_compiled_shader *shader = entry->data; + _mesa_hash_table_remove(ht, entry); + vc5_bo_unreference(&shader->bo); + + if (shader == *last_compile) + *last_compile = NULL; + + ralloc_free(shader); + } +} + +static void +vc5_shader_state_delete(struct pipe_context *pctx, void *hwcso) +{ + struct vc5_context *vc5 = vc5_context(pctx); + struct vc5_uncompiled_shader *so = hwcso; + + struct hash_entry *entry; + hash_table_foreach(vc5->fs_cache, entry) { + delete_from_cache_if_matches(vc5->fs_cache, &vc5->prog.fs, + entry, so); + } + hash_table_foreach(vc5->vs_cache, entry) { + delete_from_cache_if_matches(vc5->vs_cache, &vc5->prog.vs, + entry, so); + } + + ralloc_free(so->base.ir.nir); + free(so); +} + +static void +vc5_fp_state_bind(struct pipe_context *pctx, void *hwcso) +{ + struct vc5_context *vc5 = vc5_context(pctx); + vc5->prog.bind_fs = hwcso; + vc5->dirty |= VC5_DIRTY_UNCOMPILED_FS; +} + +static void +vc5_vp_state_bind(struct pipe_context *pctx, void *hwcso) +{ + struct vc5_context *vc5 = vc5_context(pctx); + vc5->prog.bind_vs = hwcso; + vc5->dirty |= VC5_DIRTY_UNCOMPILED_VS; +} + +void +vc5_program_init(struct pipe_context *pctx) +{ + struct vc5_context *vc5 = vc5_context(pctx); + + pctx->create_vs_state = vc5_shader_state_create; + pctx->delete_vs_state = vc5_shader_state_delete; + + pctx->create_fs_state = vc5_shader_state_create; + pctx->delete_fs_state = vc5_shader_state_delete; + + pctx->bind_fs_state = vc5_fp_state_bind; + pctx->bind_vs_state = vc5_vp_state_bind; + + vc5->fs_cache = _mesa_hash_table_create(pctx, fs_cache_hash, + fs_cache_compare); + vc5->vs_cache = _mesa_hash_table_create(pctx, vs_cache_hash, + vs_cache_compare); +} + +void +vc5_program_fini(struct pipe_context *pctx) +{ + struct vc5_context *vc5 = vc5_context(pctx); + + struct hash_entry *entry; + hash_table_foreach(vc5->fs_cache, entry) { + struct vc5_compiled_shader *shader = entry->data; + vc5_bo_unreference(&shader->bo); + ralloc_free(shader); + _mesa_hash_table_remove(vc5->fs_cache, entry); + } + + hash_table_foreach(vc5->vs_cache, entry) { + struct vc5_compiled_shader *shader = entry->data; + vc5_bo_unreference(&shader->bo); + ralloc_free(shader); + _mesa_hash_table_remove(vc5->vs_cache, entry); + } +} diff --git a/src/gallium/drivers/v3d/v3d_query.c b/src/gallium/drivers/v3d/v3d_query.c new file mode 100644 index 00000000000..f645544bedf --- /dev/null +++ b/src/gallium/drivers/v3d/v3d_query.c @@ -0,0 +1,180 @@ +/* + * Copyright © 2014 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/** + * Gallium query object support. + * + * The HW has native support for occlusion queries, with the query result + * being loaded and stored by the TLB unit. From a SW perspective, we have to + * be careful to make sure that the jobs that need to be tracking queries are + * bracketed by the start and end of counting, even across FBO transitions. + * + * For the transform feedback PRIMITIVES_GENERATED/WRITTEN queries, we have to + * do the calculations in software at draw time. + */ + +#include "v3d_context.h" +#include "broadcom/cle/v3d_packet_v33_pack.h" + +struct vc5_query +{ + enum pipe_query_type type; + struct vc5_bo *bo; + + uint32_t start, end; +}; + +static struct pipe_query * +vc5_create_query(struct pipe_context *pctx, unsigned query_type, unsigned index) +{ + struct vc5_query *q = calloc(1, sizeof(*q)); + + q->type = query_type; + + /* Note that struct pipe_query isn't actually defined anywhere. */ + return (struct pipe_query *)q; +} + +static void +vc5_destroy_query(struct pipe_context *pctx, struct pipe_query *query) +{ + struct vc5_query *q = (struct vc5_query *)query; + + vc5_bo_unreference(&q->bo); + free(q); +} + +static boolean +vc5_begin_query(struct pipe_context *pctx, struct pipe_query *query) +{ + struct vc5_context *vc5 = vc5_context(pctx); + struct vc5_query *q = (struct vc5_query *)query; + + switch (q->type) { + case PIPE_QUERY_PRIMITIVES_GENERATED: + q->start = vc5->prims_generated; + break; + case PIPE_QUERY_PRIMITIVES_EMITTED: + q->start = vc5->tf_prims_generated; + break; + default: + q->bo = vc5_bo_alloc(vc5->screen, 4096, "query"); + + uint32_t *map = vc5_bo_map(q->bo); + *map = 0; + vc5->current_oq = q->bo; + vc5->dirty |= VC5_DIRTY_OQ; + break; + } + + return true; +} + +static bool +vc5_end_query(struct pipe_context *pctx, struct pipe_query *query) +{ + struct vc5_context *vc5 = vc5_context(pctx); + struct vc5_query *q = (struct vc5_query *)query; + + switch (q->type) { + case PIPE_QUERY_PRIMITIVES_GENERATED: + q->end = vc5->prims_generated; + break; + case PIPE_QUERY_PRIMITIVES_EMITTED: + q->end = vc5->tf_prims_generated; + break; + default: + vc5->current_oq = NULL; + vc5->dirty |= VC5_DIRTY_OQ; + break; + } + + return true; +} + +static boolean +vc5_get_query_result(struct pipe_context *pctx, struct pipe_query *query, + boolean wait, union pipe_query_result *vresult) +{ + struct vc5_query *q = (struct vc5_query *)query; + uint32_t result = 0; + + if (q->bo) { + /* XXX: Only flush the jobs using this BO. */ + vc5_flush(pctx); + + if (wait) { + if (!vc5_bo_wait(q->bo, 0, "query")) + return false; + } else { + if (!vc5_bo_wait(q->bo, ~0ull, "query")) + return false; + } + + /* XXX: Sum up per-core values. */ + uint32_t *map = vc5_bo_map(q->bo); + result = *map; + + vc5_bo_unreference(&q->bo); + } + + switch (q->type) { + case PIPE_QUERY_OCCLUSION_COUNTER: + vresult->u64 = result; + break; + case PIPE_QUERY_OCCLUSION_PREDICATE: + case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: + vresult->b = result != 0; + break; + case PIPE_QUERY_PRIMITIVES_GENERATED: + case PIPE_QUERY_PRIMITIVES_EMITTED: + vresult->u64 = q->end - q->start; + break; + default: + unreachable("unsupported query type"); + } + + return true; +} + +static void +vc5_set_active_query_state(struct pipe_context *pctx, boolean enable) +{ + struct vc5_context *vc5 = vc5_context(pctx); + + vc5->active_queries = enable; + vc5->dirty |= VC5_DIRTY_OQ; + vc5->dirty |= VC5_DIRTY_STREAMOUT; +} + +void +vc5_query_init(struct pipe_context *pctx) +{ + pctx->create_query = vc5_create_query; + pctx->destroy_query = vc5_destroy_query; + pctx->begin_query = vc5_begin_query; + pctx->end_query = vc5_end_query; + pctx->get_query_result = vc5_get_query_result; + pctx->set_active_query_state = vc5_set_active_query_state; +} + diff --git a/src/gallium/drivers/v3d/v3d_resource.c b/src/gallium/drivers/v3d/v3d_resource.c new file mode 100644 index 00000000000..1cd3f1949a2 --- /dev/null +++ b/src/gallium/drivers/v3d/v3d_resource.c @@ -0,0 +1,914 @@ +/* + * Copyright © 2014-2017 Broadcom + * Copyright (C) 2012 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "pipe/p_defines.h" +#include "util/u_blit.h" +#include "util/u_memory.h" +#include "util/u_format.h" +#include "util/u_inlines.h" +#include "util/u_surface.h" +#include "util/u_transfer_helper.h" +#include "util/u_upload_mgr.h" +#include "util/u_format_zs.h" + +#include "drm_fourcc.h" +#include "v3d_screen.h" +#include "v3d_context.h" +#include "v3d_resource.h" +#include "v3d_tiling.h" +#include "broadcom/cle/v3d_packet_v33_pack.h" + +static void +vc5_debug_resource_layout(struct vc5_resource *rsc, const char *caller) +{ + if (!(V3D_DEBUG & V3D_DEBUG_SURFACE)) + return; + + struct pipe_resource *prsc = &rsc->base; + + if (prsc->target == PIPE_BUFFER) { + fprintf(stderr, + "rsc %s %p (format %s), %dx%d buffer @0x%08x-0x%08x\n", + caller, rsc, + util_format_short_name(prsc->format), + prsc->width0, prsc->height0, + rsc->bo->offset, + rsc->bo->offset + rsc->bo->size - 1); + return; + } + + static const char *const tiling_descriptions[] = { + [VC5_TILING_RASTER] = "R", + [VC5_TILING_LINEARTILE] = "LT", + [VC5_TILING_UBLINEAR_1_COLUMN] = "UB1", + [VC5_TILING_UBLINEAR_2_COLUMN] = "UB2", + [VC5_TILING_UIF_NO_XOR] = "UIF", + [VC5_TILING_UIF_XOR] = "UIF^", + }; + + for (int i = 0; i <= prsc->last_level; i++) { + struct vc5_resource_slice *slice = &rsc->slices[i]; + + int level_width = slice->stride / rsc->cpp; + int level_height = slice->padded_height; + int level_depth = + u_minify(util_next_power_of_two(prsc->depth0), i); + + fprintf(stderr, + "rsc %s %p (format %s), %dx%d: " + "level %d (%s) %dx%dx%d -> %dx%dx%d, stride %d@0x%08x\n", + caller, rsc, + util_format_short_name(prsc->format), + prsc->width0, prsc->height0, + i, tiling_descriptions[slice->tiling], + u_minify(prsc->width0, i), + u_minify(prsc->height0, i), + u_minify(prsc->depth0, i), + level_width, + level_height, + level_depth, + slice->stride, + rsc->bo->offset + slice->offset); + } +} + +static bool +vc5_resource_bo_alloc(struct vc5_resource *rsc) +{ + struct pipe_resource *prsc = &rsc->base; + struct pipe_screen *pscreen = prsc->screen; + struct vc5_bo *bo; + + bo = vc5_bo_alloc(vc5_screen(pscreen), rsc->size, "resource"); + if (bo) { + vc5_bo_unreference(&rsc->bo); + rsc->bo = bo; + vc5_debug_resource_layout(rsc, "alloc"); + return true; + } else { + return false; + } +} + +static void +vc5_resource_transfer_unmap(struct pipe_context *pctx, + struct pipe_transfer *ptrans) +{ + struct vc5_context *vc5 = vc5_context(pctx); + struct vc5_transfer *trans = vc5_transfer(ptrans); + + if (trans->map) { + struct vc5_resource *rsc = vc5_resource(ptrans->resource); + struct vc5_resource_slice *slice = &rsc->slices[ptrans->level]; + + if (ptrans->usage & PIPE_TRANSFER_WRITE) { + for (int z = 0; z < ptrans->box.depth; z++) { + void *dst = rsc->bo->map + + vc5_layer_offset(&rsc->base, + ptrans->level, + ptrans->box.z + z); + vc5_store_tiled_image(dst, + slice->stride, + (trans->map + + ptrans->stride * + ptrans->box.height * z), + ptrans->stride, + slice->tiling, rsc->cpp, + slice->padded_height, + &ptrans->box); + } + } + free(trans->map); + } + + pipe_resource_reference(&ptrans->resource, NULL); + slab_free(&vc5->transfer_pool, ptrans); +} + +static void * +vc5_resource_transfer_map(struct pipe_context *pctx, + struct pipe_resource *prsc, + unsigned level, unsigned usage, + const struct pipe_box *box, + struct pipe_transfer **pptrans) +{ + struct vc5_context *vc5 = vc5_context(pctx); + struct vc5_resource *rsc = vc5_resource(prsc); + struct vc5_transfer *trans; + struct pipe_transfer *ptrans; + enum pipe_format format = prsc->format; + char *buf; + + /* MSAA maps should have been handled by u_transfer_helper. */ + assert(prsc->nr_samples <= 1); + + /* Upgrade DISCARD_RANGE to WHOLE_RESOURCE if the whole resource is + * being mapped. + */ + if ((usage & PIPE_TRANSFER_DISCARD_RANGE) && + !(usage & PIPE_TRANSFER_UNSYNCHRONIZED) && + !(prsc->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT) && + prsc->last_level == 0 && + prsc->width0 == box->width && + prsc->height0 == box->height && + prsc->depth0 == box->depth && + prsc->array_size == 1 && + rsc->bo->private) { + usage |= PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE; + } + + if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) { + if (vc5_resource_bo_alloc(rsc)) { + /* If it might be bound as one of our vertex buffers + * or UBOs, make sure we re-emit vertex buffer state + * or uniforms. + */ + if (prsc->bind & PIPE_BIND_VERTEX_BUFFER) + vc5->dirty |= VC5_DIRTY_VTXBUF; + if (prsc->bind & PIPE_BIND_CONSTANT_BUFFER) + vc5->dirty |= VC5_DIRTY_CONSTBUF; + } else { + /* If we failed to reallocate, flush users so that we + * don't violate any syncing requirements. + */ + vc5_flush_jobs_reading_resource(vc5, prsc); + } + } else if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) { + /* If we're writing and the buffer is being used by the CL, we + * have to flush the CL first. If we're only reading, we need + * to flush if the CL has written our buffer. + */ + if (usage & PIPE_TRANSFER_WRITE) + vc5_flush_jobs_reading_resource(vc5, prsc); + else + vc5_flush_jobs_writing_resource(vc5, prsc); + } + + if (usage & PIPE_TRANSFER_WRITE) { + rsc->writes++; + rsc->initialized_buffers = ~0; + } + + trans = slab_alloc(&vc5->transfer_pool); + if (!trans) + return NULL; + + /* XXX: Handle DONTBLOCK, DISCARD_RANGE, PERSISTENT, COHERENT. */ + + /* slab_alloc_st() doesn't zero: */ + memset(trans, 0, sizeof(*trans)); + ptrans = &trans->base; + + pipe_resource_reference(&ptrans->resource, prsc); + ptrans->level = level; + ptrans->usage = usage; + ptrans->box = *box; + + /* Note that the current kernel implementation is synchronous, so no + * need to do syncing stuff here yet. + */ + + if (usage & PIPE_TRANSFER_UNSYNCHRONIZED) + buf = vc5_bo_map_unsynchronized(rsc->bo); + else + buf = vc5_bo_map(rsc->bo); + if (!buf) { + fprintf(stderr, "Failed to map bo\n"); + goto fail; + } + + *pptrans = ptrans; + + /* Our load/store routines work on entire compressed blocks. */ + ptrans->box.x /= util_format_get_blockwidth(format); + ptrans->box.y /= util_format_get_blockheight(format); + ptrans->box.width = DIV_ROUND_UP(ptrans->box.width, + util_format_get_blockwidth(format)); + ptrans->box.height = DIV_ROUND_UP(ptrans->box.height, + util_format_get_blockheight(format)); + + struct vc5_resource_slice *slice = &rsc->slices[level]; + if (rsc->tiled) { + /* No direct mappings of tiled, since we need to manually + * tile/untile. + */ + if (usage & PIPE_TRANSFER_MAP_DIRECTLY) + return NULL; + + ptrans->stride = ptrans->box.width * rsc->cpp; + ptrans->layer_stride = ptrans->stride * ptrans->box.height; + + trans->map = malloc(ptrans->layer_stride * ptrans->box.depth); + + if (usage & PIPE_TRANSFER_READ) { + for (int z = 0; z < ptrans->box.depth; z++) { + void *src = rsc->bo->map + + vc5_layer_offset(&rsc->base, + ptrans->level, + ptrans->box.z + z); + vc5_load_tiled_image((trans->map + + ptrans->stride * + ptrans->box.height * z), + ptrans->stride, + src, + slice->stride, + slice->tiling, rsc->cpp, + slice->padded_height, + &ptrans->box); + } + } + return trans->map; + } else { + ptrans->stride = slice->stride; + ptrans->layer_stride = ptrans->stride; + + return buf + slice->offset + + ptrans->box.y * ptrans->stride + + ptrans->box.x * rsc->cpp + + ptrans->box.z * rsc->cube_map_stride; + } + + +fail: + vc5_resource_transfer_unmap(pctx, ptrans); + return NULL; +} + +static void +vc5_resource_destroy(struct pipe_screen *pscreen, + struct pipe_resource *prsc) +{ + struct vc5_resource *rsc = vc5_resource(prsc); + + vc5_bo_unreference(&rsc->bo); + free(rsc); +} + +static boolean +vc5_resource_get_handle(struct pipe_screen *pscreen, + struct pipe_context *pctx, + struct pipe_resource *prsc, + struct winsys_handle *whandle, + unsigned usage) +{ + struct vc5_resource *rsc = vc5_resource(prsc); + struct vc5_bo *bo = rsc->bo; + + whandle->stride = rsc->slices[0].stride; + + /* If we're passing some reference to our BO out to some other part of + * the system, then we can't do any optimizations about only us being + * the ones seeing it (like BO caching). + */ + bo->private = false; + + switch (whandle->type) { + case DRM_API_HANDLE_TYPE_SHARED: + return vc5_bo_flink(bo, &whandle->handle); + case DRM_API_HANDLE_TYPE_KMS: + whandle->handle = bo->handle; + return TRUE; + case DRM_API_HANDLE_TYPE_FD: + whandle->handle = vc5_bo_get_dmabuf(bo); + return whandle->handle != -1; + } + + return FALSE; +} + +#define PAGE_UB_ROWS (VC5_UIFCFG_PAGE_SIZE / VC5_UIFBLOCK_ROW_SIZE) +#define PAGE_UB_ROWS_TIMES_1_5 ((PAGE_UB_ROWS * 3) >> 1) +#define PAGE_CACHE_UB_ROWS (VC5_PAGE_CACHE_SIZE / VC5_UIFBLOCK_ROW_SIZE) +#define PAGE_CACHE_MINUS_1_5_UB_ROWS (PAGE_CACHE_UB_ROWS - PAGE_UB_ROWS_TIMES_1_5) + +/** + * Computes the HW's UIFblock padding for a given height/cpp. + * + * The goal of the padding is to keep pages of the same color (bank number) at + * least half a page away from each other vertically when crossing between + * between columns of UIF blocks. + */ +static uint32_t +vc5_get_ub_pad(struct vc5_resource *rsc, uint32_t height) +{ + uint32_t utile_h = vc5_utile_height(rsc->cpp); + uint32_t uif_block_h = utile_h * 2; + uint32_t height_ub = height / uif_block_h; + + uint32_t height_offset_in_pc = height_ub % PAGE_CACHE_UB_ROWS; + + /* For the perfectly-aligned-for-UIF-XOR case, don't add any pad. */ + if (height_offset_in_pc == 0) + return 0; + + /* Try padding up to where we're offset by at least half a page. */ + if (height_offset_in_pc < PAGE_UB_ROWS_TIMES_1_5) { + /* If we fit entirely in the page cache, don't pad. */ + if (height_ub < PAGE_CACHE_UB_ROWS) + return 0; + else + return PAGE_UB_ROWS_TIMES_1_5 - height_offset_in_pc; + } + + /* If we're close to being aligned to page cache size, then round up + * and rely on XOR. + */ + if (height_offset_in_pc > PAGE_CACHE_MINUS_1_5_UB_ROWS) + return PAGE_CACHE_UB_ROWS - height_offset_in_pc; + + /* Otherwise, we're far enough away (top and bottom) to not need any + * padding. + */ + return 0; +} + +static void +vc5_setup_slices(struct vc5_resource *rsc) +{ + struct pipe_resource *prsc = &rsc->base; + uint32_t width = prsc->width0; + uint32_t height = prsc->height0; + uint32_t depth = prsc->depth0; + /* Note that power-of-two padding is based on level 1. These are not + * equivalent to just util_next_power_of_two(dimension), because at a + * level 0 dimension of 9, the level 1 power-of-two padded value is 4, + * not 8. + */ + uint32_t pot_width = 2 * util_next_power_of_two(u_minify(width, 1)); + uint32_t pot_height = 2 * util_next_power_of_two(u_minify(height, 1)); + uint32_t pot_depth = 2 * util_next_power_of_two(u_minify(depth, 1)); + uint32_t offset = 0; + uint32_t utile_w = vc5_utile_width(rsc->cpp); + uint32_t utile_h = vc5_utile_height(rsc->cpp); + uint32_t uif_block_w = utile_w * 2; + uint32_t uif_block_h = utile_h * 2; + uint32_t block_width = util_format_get_blockwidth(prsc->format); + uint32_t block_height = util_format_get_blockheight(prsc->format); + bool msaa = prsc->nr_samples > 1; + /* MSAA textures/renderbuffers are always laid out as single-level + * UIF. + */ + bool uif_top = msaa; + + for (int i = prsc->last_level; i >= 0; i--) { + struct vc5_resource_slice *slice = &rsc->slices[i]; + + uint32_t level_width, level_height, level_depth; + if (i < 2) { + level_width = u_minify(width, i); + level_height = u_minify(height, i); + } else { + level_width = u_minify(pot_width, i); + level_height = u_minify(pot_height, i); + } + if (i < 1) + level_depth = u_minify(depth, i); + else + level_depth = u_minify(pot_depth, i); + + if (msaa) { + level_width *= 2; + level_height *= 2; + } + + level_width = DIV_ROUND_UP(level_width, block_width); + level_height = DIV_ROUND_UP(level_height, block_height); + + if (!rsc->tiled) { + slice->tiling = VC5_TILING_RASTER; + if (prsc->target == PIPE_TEXTURE_1D) + level_width = align(level_width, 64 / rsc->cpp); + } else { + if ((i != 0 || !uif_top) && + (level_width <= utile_w || + level_height <= utile_h)) { + slice->tiling = VC5_TILING_LINEARTILE; + level_width = align(level_width, utile_w); + level_height = align(level_height, utile_h); + } else if ((i != 0 || !uif_top) && + level_width <= uif_block_w) { + slice->tiling = VC5_TILING_UBLINEAR_1_COLUMN; + level_width = align(level_width, uif_block_w); + level_height = align(level_height, uif_block_h); + } else if ((i != 0 || !uif_top) && + level_width <= 2 * uif_block_w) { + slice->tiling = VC5_TILING_UBLINEAR_2_COLUMN; + level_width = align(level_width, 2 * uif_block_w); + level_height = align(level_height, uif_block_h); + } else { + /* We align the width to a 4-block column of + * UIF blocks, but we only align height to UIF + * blocks. + */ + level_width = align(level_width, + 4 * uif_block_w); + level_height = align(level_height, + uif_block_h); + + slice->ub_pad = vc5_get_ub_pad(rsc, + level_height); + level_height += slice->ub_pad * uif_block_h; + + /* If the padding set us to to be aligned to + * the page cache size, then the HW will use + * the XOR bit on odd columns to get us + * perfectly misaligned + */ + if ((level_height / uif_block_h) % + (VC5_PAGE_CACHE_SIZE / + VC5_UIFBLOCK_ROW_SIZE) == 0) { + slice->tiling = VC5_TILING_UIF_XOR; + } else { + slice->tiling = VC5_TILING_UIF_NO_XOR; + } + } + } + + slice->offset = offset; + slice->stride = level_width * rsc->cpp; + slice->padded_height = level_height; + slice->size = level_height * slice->stride; + + uint32_t slice_total_size = slice->size * level_depth; + + /* The HW aligns level 1's base to a page if any of level 1 or + * below could be UIF XOR. The lower levels then inherit the + * alignment for as long as necesary, thanks to being power of + * two aligned. + */ + if (i == 1 && + level_width > 4 * uif_block_w && + level_height > PAGE_CACHE_MINUS_1_5_UB_ROWS * uif_block_h) { + slice_total_size = align(slice_total_size, + VC5_UIFCFG_PAGE_SIZE); + } + + offset += slice_total_size; + + } + rsc->size = offset; + + /* UIF/UBLINEAR levels need to be aligned to UIF-blocks, and LT only + * needs to be aligned to utile boundaries. Since tiles are laid out + * from small to big in memory, we need to align the later UIF slices + * to UIF blocks, if they were preceded by non-UIF-block-aligned LT + * slices. + * + * We additionally align to 4k, which improves UIF XOR performance. + */ + uint32_t page_align_offset = (align(rsc->slices[0].offset, 4096) - + rsc->slices[0].offset); + if (page_align_offset) { + rsc->size += page_align_offset; + for (int i = 0; i <= prsc->last_level; i++) + rsc->slices[i].offset += page_align_offset; + } + + /* Arrays and cube textures have a stride which is the distance from + * one full mipmap tree to the next (64b aligned). For 3D textures, + * we need to program the stride between slices of miplevel 0. + */ + if (prsc->target != PIPE_TEXTURE_3D) { + rsc->cube_map_stride = align(rsc->slices[0].offset + + rsc->slices[0].size, 64); + rsc->size += rsc->cube_map_stride * (prsc->array_size - 1); + } else { + rsc->cube_map_stride = rsc->slices[0].size; + } +} + +uint32_t +vc5_layer_offset(struct pipe_resource *prsc, uint32_t level, uint32_t layer) +{ + struct vc5_resource *rsc = vc5_resource(prsc); + struct vc5_resource_slice *slice = &rsc->slices[level]; + + if (prsc->target == PIPE_TEXTURE_3D) + return slice->offset + layer * slice->size; + else + return slice->offset + layer * rsc->cube_map_stride; +} + +static struct vc5_resource * +vc5_resource_setup(struct pipe_screen *pscreen, + const struct pipe_resource *tmpl) +{ + struct vc5_screen *screen = vc5_screen(pscreen); + struct vc5_resource *rsc = CALLOC_STRUCT(vc5_resource); + if (!rsc) + return NULL; + struct pipe_resource *prsc = &rsc->base; + + *prsc = *tmpl; + + pipe_reference_init(&prsc->reference, 1); + prsc->screen = pscreen; + + if (prsc->nr_samples <= 1 || + screen->devinfo.ver >= 40 || + util_format_is_depth_or_stencil(prsc->format)) { + rsc->cpp = util_format_get_blocksize(prsc->format); + if (screen->devinfo.ver < 40 && prsc->nr_samples > 1) + rsc->cpp *= prsc->nr_samples; + } else { + assert(vc5_rt_format_supported(&screen->devinfo, prsc->format)); + uint32_t output_image_format = + vc5_get_rt_format(&screen->devinfo, prsc->format); + uint32_t internal_type; + uint32_t internal_bpp; + vc5_get_internal_type_bpp_for_output_format(&screen->devinfo, + output_image_format, + &internal_type, + &internal_bpp); + switch (internal_bpp) { + case V3D_INTERNAL_BPP_32: + rsc->cpp = 4; + break; + case V3D_INTERNAL_BPP_64: + rsc->cpp = 8; + break; + case V3D_INTERNAL_BPP_128: + rsc->cpp = 16; + break; + } + } + + assert(rsc->cpp); + + return rsc; +} + +static bool +find_modifier(uint64_t needle, const uint64_t *haystack, int count) +{ + int i; + + for (i = 0; i < count; i++) { + if (haystack[i] == needle) + return true; + } + + return false; +} + +static struct pipe_resource * +vc5_resource_create_with_modifiers(struct pipe_screen *pscreen, + const struct pipe_resource *tmpl, + const uint64_t *modifiers, + int count) +{ + bool linear_ok = find_modifier(DRM_FORMAT_MOD_LINEAR, modifiers, count); + struct vc5_resource *rsc = vc5_resource_setup(pscreen, tmpl); + struct pipe_resource *prsc = &rsc->base; + /* Use a tiled layout if we can, for better 3D performance. */ + bool should_tile = true; + + /* VBOs/PBOs are untiled (and 1 height). */ + if (tmpl->target == PIPE_BUFFER) + should_tile = false; + + /* Cursors are always linear, and the user can request linear as well. + */ + if (tmpl->bind & (PIPE_BIND_LINEAR | PIPE_BIND_CURSOR)) + should_tile = false; + + /* 1D and 1D_ARRAY textures are always raster-order. */ + if (tmpl->target == PIPE_TEXTURE_1D || + tmpl->target == PIPE_TEXTURE_1D_ARRAY) + should_tile = false; + + /* Scanout BOs for simulator need to be linear for interaction with + * i965. + */ + if (using_vc5_simulator && + tmpl->bind & (PIPE_BIND_SHARED | PIPE_BIND_SCANOUT)) + should_tile = false; + + /* No user-specified modifier; determine our own. */ + if (count == 1 && modifiers[0] == DRM_FORMAT_MOD_INVALID) { + linear_ok = true; + rsc->tiled = should_tile; + } else if (should_tile && + find_modifier(DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED, + modifiers, count)) { + rsc->tiled = true; + } else if (linear_ok) { + rsc->tiled = false; + } else { + fprintf(stderr, "Unsupported modifier requested\n"); + return NULL; + } + + rsc->internal_format = prsc->format; + + vc5_setup_slices(rsc); + if (!vc5_resource_bo_alloc(rsc)) + goto fail; + + return prsc; +fail: + vc5_resource_destroy(pscreen, prsc); + return NULL; +} + +struct pipe_resource * +vc5_resource_create(struct pipe_screen *pscreen, + const struct pipe_resource *tmpl) +{ + const uint64_t mod = DRM_FORMAT_MOD_INVALID; + return vc5_resource_create_with_modifiers(pscreen, tmpl, &mod, 1); +} + +static struct pipe_resource * +vc5_resource_from_handle(struct pipe_screen *pscreen, + const struct pipe_resource *tmpl, + struct winsys_handle *whandle, + unsigned usage) +{ + struct vc5_screen *screen = vc5_screen(pscreen); + struct vc5_resource *rsc = vc5_resource_setup(pscreen, tmpl); + struct pipe_resource *prsc = &rsc->base; + struct vc5_resource_slice *slice = &rsc->slices[0]; + + if (!rsc) + return NULL; + + switch (whandle->modifier) { + case DRM_FORMAT_MOD_LINEAR: + case DRM_FORMAT_MOD_INVALID: + rsc->tiled = false; + break; + /* XXX: UIF */ + default: + fprintf(stderr, + "Attempt to import unsupported modifier 0x%llx\n", + (long long)whandle->modifier); + goto fail; + } + + if (whandle->offset != 0) { + fprintf(stderr, + "Attempt to import unsupported winsys offset %u\n", + whandle->offset); + goto fail; + } + + switch (whandle->type) { + case DRM_API_HANDLE_TYPE_SHARED: + rsc->bo = vc5_bo_open_name(screen, + whandle->handle, whandle->stride); + break; + case DRM_API_HANDLE_TYPE_FD: + rsc->bo = vc5_bo_open_dmabuf(screen, + whandle->handle, whandle->stride); + break; + default: + fprintf(stderr, + "Attempt to import unsupported handle type %d\n", + whandle->type); + goto fail; + } + + if (!rsc->bo) + goto fail; + + rsc->internal_format = prsc->format; + + vc5_setup_slices(rsc); + vc5_debug_resource_layout(rsc, "import"); + + if (whandle->stride != slice->stride) { + static bool warned = false; + if (!warned) { + warned = true; + fprintf(stderr, + "Attempting to import %dx%d %s with " + "unsupported stride %d instead of %d\n", + prsc->width0, prsc->height0, + util_format_short_name(prsc->format), + whandle->stride, + slice->stride); + } + goto fail; + } + + return prsc; + +fail: + vc5_resource_destroy(pscreen, prsc); + return NULL; +} + +static struct pipe_surface * +vc5_create_surface(struct pipe_context *pctx, + struct pipe_resource *ptex, + const struct pipe_surface *surf_tmpl) +{ + struct vc5_context *vc5 = vc5_context(pctx); + struct vc5_screen *screen = vc5->screen; + struct vc5_surface *surface = CALLOC_STRUCT(vc5_surface); + struct vc5_resource *rsc = vc5_resource(ptex); + + if (!surface) + return NULL; + + assert(surf_tmpl->u.tex.first_layer == surf_tmpl->u.tex.last_layer); + + struct pipe_surface *psurf = &surface->base; + unsigned level = surf_tmpl->u.tex.level; + struct vc5_resource_slice *slice = &rsc->slices[level]; + + pipe_reference_init(&psurf->reference, 1); + pipe_resource_reference(&psurf->texture, ptex); + + psurf->context = pctx; + psurf->format = surf_tmpl->format; + psurf->width = u_minify(ptex->width0, level); + psurf->height = u_minify(ptex->height0, level); + psurf->u.tex.level = level; + psurf->u.tex.first_layer = surf_tmpl->u.tex.first_layer; + psurf->u.tex.last_layer = surf_tmpl->u.tex.last_layer; + + surface->offset = vc5_layer_offset(ptex, level, + psurf->u.tex.first_layer); + surface->tiling = slice->tiling; + + surface->format = vc5_get_rt_format(&screen->devinfo, psurf->format); + + if (util_format_is_depth_or_stencil(psurf->format)) { + switch (psurf->format) { + case PIPE_FORMAT_Z16_UNORM: + surface->internal_type = V3D_INTERNAL_TYPE_DEPTH_16; + break; + case PIPE_FORMAT_Z32_FLOAT: + case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: + surface->internal_type = V3D_INTERNAL_TYPE_DEPTH_32F; + break; + default: + surface->internal_type = V3D_INTERNAL_TYPE_DEPTH_24; + } + } else { + uint32_t bpp, type; + vc5_get_internal_type_bpp_for_output_format(&screen->devinfo, + surface->format, + &type, &bpp); + surface->internal_type = type; + surface->internal_bpp = bpp; + } + + if (surface->tiling == VC5_TILING_UIF_NO_XOR || + surface->tiling == VC5_TILING_UIF_XOR) { + surface->padded_height_of_output_image_in_uif_blocks = + (slice->padded_height / + (2 * vc5_utile_height(rsc->cpp))); + } + + if (rsc->separate_stencil) { + surface->separate_stencil = + vc5_create_surface(pctx, &rsc->separate_stencil->base, + surf_tmpl); + } + + return &surface->base; +} + +static void +vc5_surface_destroy(struct pipe_context *pctx, struct pipe_surface *psurf) +{ + struct vc5_surface *surf = vc5_surface(psurf); + + if (surf->separate_stencil) + pipe_surface_reference(&surf->separate_stencil, NULL); + + pipe_resource_reference(&psurf->texture, NULL); + FREE(psurf); +} + +static void +vc5_flush_resource(struct pipe_context *pctx, struct pipe_resource *resource) +{ + /* All calls to flush_resource are followed by a flush of the context, + * so there's nothing to do. + */ +} + +static enum pipe_format +vc5_resource_get_internal_format(struct pipe_resource *prsc) +{ + return vc5_resource(prsc)->internal_format; +} + +static void +vc5_resource_set_stencil(struct pipe_resource *prsc, + struct pipe_resource *stencil) +{ + vc5_resource(prsc)->separate_stencil = vc5_resource(stencil); +} + +static struct pipe_resource * +vc5_resource_get_stencil(struct pipe_resource *prsc) +{ + struct vc5_resource *rsc = vc5_resource(prsc); + + return &rsc->separate_stencil->base; +} + +static const struct u_transfer_vtbl transfer_vtbl = { + .resource_create = vc5_resource_create, + .resource_destroy = vc5_resource_destroy, + .transfer_map = vc5_resource_transfer_map, + .transfer_unmap = vc5_resource_transfer_unmap, + .transfer_flush_region = u_default_transfer_flush_region, + .get_internal_format = vc5_resource_get_internal_format, + .set_stencil = vc5_resource_set_stencil, + .get_stencil = vc5_resource_get_stencil, +}; + +void +vc5_resource_screen_init(struct pipe_screen *pscreen) +{ + pscreen->resource_create_with_modifiers = + vc5_resource_create_with_modifiers; + pscreen->resource_create = u_transfer_helper_resource_create; + pscreen->resource_from_handle = vc5_resource_from_handle; + pscreen->resource_get_handle = vc5_resource_get_handle; + pscreen->resource_destroy = u_transfer_helper_resource_destroy; + pscreen->transfer_helper = u_transfer_helper_create(&transfer_vtbl, + true, true, true); +} + +void +vc5_resource_context_init(struct pipe_context *pctx) +{ + pctx->transfer_map = u_transfer_helper_transfer_map; + pctx->transfer_flush_region = u_transfer_helper_transfer_flush_region; + pctx->transfer_unmap = u_transfer_helper_transfer_unmap; + pctx->buffer_subdata = u_default_buffer_subdata; + pctx->texture_subdata = u_default_texture_subdata; + pctx->create_surface = vc5_create_surface; + pctx->surface_destroy = vc5_surface_destroy; + pctx->resource_copy_region = util_resource_copy_region; + pctx->blit = vc5_blit; + pctx->flush_resource = vc5_flush_resource; +} diff --git a/src/gallium/drivers/v3d/v3d_resource.h b/src/gallium/drivers/v3d/v3d_resource.h new file mode 100644 index 00000000000..dc68f803e90 --- /dev/null +++ b/src/gallium/drivers/v3d/v3d_resource.h @@ -0,0 +1,175 @@ +/* + * Copyright © 2014-2017 Broadcom + * Copyright (C) 2012 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef VC5_RESOURCE_H +#define VC5_RESOURCE_H + +#include "v3d_screen.h" +#include "util/u_transfer.h" + +/* A UIFblock is a 256-byte region of memory that's 256-byte aligned. These + * will be grouped in 4x4 blocks (left-to-right, then top-to-bottom) in a 4KB + * page. Those pages are then arranged left-to-right, top-to-bottom, to cover + * an image. + * + * The inside of a UIFblock, for packed pixels, will be split into 4 64-byte + * utiles. Utiles may be 8x8 (8bpp), 8x4(16bpp) or 4x4 (32bpp). + */ + +/** + * Tiling mode enum used for vc5_resource.c, which maps directly to the Memory + * Format field of render target and Z/Stencil config. + */ +enum vc5_tiling_mode { + /* Untiled resources. Not valid as texture inputs. */ + VC5_TILING_RASTER, + + /* Single line of u-tiles. */ + VC5_TILING_LINEARTILE, + + /* Departure from standard 4-UIF block column format. */ + VC5_TILING_UBLINEAR_1_COLUMN, + + /* Departure from standard 4-UIF block column format. */ + VC5_TILING_UBLINEAR_2_COLUMN, + + /* Normal tiling format: grouped in 4x4 UIFblocks, each of which is + * split 2x2 into utiles. + */ + VC5_TILING_UIF_NO_XOR, + + /* Normal tiling format: grouped in 4x4 UIFblocks, each of which is + * split 2x2 into utiles. + */ + VC5_TILING_UIF_XOR, +}; + +struct vc5_transfer { + struct pipe_transfer base; + void *map; +}; + +struct vc5_resource_slice { + uint32_t offset; + uint32_t stride; + uint32_t padded_height; + /* Size of a single pane of the slice. For 3D textures, there will be + * a number of panes equal to the minified, power-of-two-aligned + * depth. + */ + uint32_t size; + uint8_t ub_pad; + enum vc5_tiling_mode tiling; +}; + +struct vc5_surface { + struct pipe_surface base; + uint32_t offset; + enum vc5_tiling_mode tiling; + /** + * Output image format for TILE_RENDERING_MODE_CONFIGURATION + */ + uint8_t format; + + /** + * Internal format of the tile buffer for + * TILE_RENDERING_MODE_CONFIGURATION. + */ + uint8_t internal_type; + + /** + * internal bpp value (0=32bpp, 2=128bpp) for color buffers in + * TILE_RENDERING_MODE_CONFIGURATION. + */ + uint8_t internal_bpp; + + uint32_t padded_height_of_output_image_in_uif_blocks; + + /* If the resource being referenced is separate stencil, then this is + * the surface to use when reading/writing stencil. + */ + struct pipe_surface *separate_stencil; +}; + +struct vc5_resource { + struct pipe_resource base; + struct vc5_bo *bo; + struct vc5_resource_slice slices[VC5_MAX_MIP_LEVELS]; + uint32_t cube_map_stride; + uint32_t size; + int cpp; + bool tiled; + + /** + * Number of times the resource has been written to. + * + * This is used to track whether we need to load the surface on first + * rendering. + */ + uint64_t writes; + + /** + * Bitmask of PIPE_CLEAR_COLOR0, PIPE_CLEAR_DEPTH, PIPE_CLEAR_STENCIL + * for which parts of the resource are defined. + * + * Used for avoiding fallback to quad clears for clearing just depth, + * when the stencil contents have never been initialized. Note that + * we're lazy and fields not present in the buffer (DEPTH in a color + * buffer) may get marked. + */ + uint32_t initialized_buffers; + + enum pipe_format internal_format; + + /* Resource storing the S8 part of a Z32F_S8 resource, or NULL. */ + struct vc5_resource *separate_stencil; +}; + +static inline struct vc5_resource * +vc5_resource(struct pipe_resource *prsc) +{ + return (struct vc5_resource *)prsc; +} + +static inline struct vc5_surface * +vc5_surface(struct pipe_surface *psurf) +{ + return (struct vc5_surface *)psurf; +} + +static inline struct vc5_transfer * +vc5_transfer(struct pipe_transfer *ptrans) +{ + return (struct vc5_transfer *)ptrans; +} + +void vc5_resource_screen_init(struct pipe_screen *pscreen); +void vc5_resource_context_init(struct pipe_context *pctx); +struct pipe_resource *vc5_resource_create(struct pipe_screen *pscreen, + const struct pipe_resource *tmpl); +uint32_t vc5_layer_offset(struct pipe_resource *prsc, uint32_t level, + uint32_t layer); + + +#endif /* VC5_RESOURCE_H */ diff --git a/src/gallium/drivers/v3d/v3d_screen.c b/src/gallium/drivers/v3d/v3d_screen.c new file mode 100644 index 00000000000..95e6a6907f4 --- /dev/null +++ b/src/gallium/drivers/v3d/v3d_screen.c @@ -0,0 +1,648 @@ +/* + * Copyright © 2014-2017 Broadcom + * Copyright (C) 2012 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "os/os_misc.h" +#include "pipe/p_defines.h" +#include "pipe/p_screen.h" +#include "pipe/p_state.h" + +#include "util/u_debug.h" +#include "util/u_memory.h" +#include "util/u_format.h" +#include "util/u_hash_table.h" +#include "util/ralloc.h" + +#include +#include "v3d_screen.h" +#include "v3d_context.h" +#include "v3d_resource.h" +#include "compiler/v3d_compiler.h" + +static const char * +vc5_screen_get_name(struct pipe_screen *pscreen) +{ + struct vc5_screen *screen = vc5_screen(pscreen); + + if (!screen->name) { + screen->name = ralloc_asprintf(screen, + "VC5 V3D %d.%d", + screen->devinfo.ver / 10, + screen->devinfo.ver % 10); + } + + return screen->name; +} + +static const char * +vc5_screen_get_vendor(struct pipe_screen *pscreen) +{ + return "Broadcom"; +} + +static void +vc5_screen_destroy(struct pipe_screen *pscreen) +{ + struct vc5_screen *screen = vc5_screen(pscreen); + + util_hash_table_destroy(screen->bo_handles); + vc5_bufmgr_destroy(pscreen); + slab_destroy_parent(&screen->transfer_pool); + + if (using_vc5_simulator) + vc5_simulator_destroy(screen); + + v3d_compiler_free(screen->compiler); + + close(screen->fd); + ralloc_free(pscreen); +} + +static int +vc5_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) +{ + struct vc5_screen *screen = vc5_screen(pscreen); + + switch (param) { + /* Supported features (boolean caps). */ + case PIPE_CAP_VERTEX_COLOR_CLAMPED: + case PIPE_CAP_VERTEX_COLOR_UNCLAMPED: + case PIPE_CAP_FRAGMENT_COLOR_CLAMPED: + case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT: + case PIPE_CAP_NPOT_TEXTURES: + case PIPE_CAP_SHAREABLE_SHADERS: + case PIPE_CAP_BLEND_EQUATION_SEPARATE: + case PIPE_CAP_TEXTURE_MULTISAMPLE: + case PIPE_CAP_TEXTURE_SWIZZLE: + case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR: + case PIPE_CAP_START_INSTANCE: + case PIPE_CAP_TGSI_INSTANCEID: + case PIPE_CAP_SM3: + case PIPE_CAP_TEXTURE_QUERY_LOD: + case PIPE_CAP_PRIMITIVE_RESTART: + case PIPE_CAP_GLSL_OPTIMIZE_CONSERVATIVELY: + case PIPE_CAP_OCCLUSION_QUERY: + case PIPE_CAP_POINT_SPRITE: + case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME: + case PIPE_CAP_ALLOW_MAPPED_BUFFERS_DURING_EXECUTION: + case PIPE_CAP_COMPUTE: + case PIPE_CAP_DRAW_INDIRECT: + case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION: + case PIPE_CAP_SIGNED_VERTEX_BUFFER_OFFSET: + return 1; + + case PIPE_CAP_INDEP_BLEND_ENABLE: + return screen->devinfo.ver >= 40; + + case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT: + return 256; + + case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT: + return 4; + + case PIPE_CAP_GLSL_FEATURE_LEVEL: + return 400; + + case PIPE_CAP_MAX_VIEWPORTS: + return 1; + + case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT: + return 1; + case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT: + return 0; + case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER: + if (screen->devinfo.ver >= 40) + return 0; + else + return 1; + case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER: + if (screen->devinfo.ver >= 40) + return 1; + else + return 0; + + case PIPE_CAP_MIXED_FRAMEBUFFER_SIZES: + case PIPE_CAP_MIXED_COLORBUFFER_FORMATS: + case PIPE_CAP_MIXED_COLOR_DEPTH_BITS: + return 1; + + + /* Stream output. */ + case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS: + return 4; + case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS: + case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS: + return 64; + + case PIPE_CAP_MIN_TEXEL_OFFSET: + case PIPE_CAP_MIN_TEXTURE_GATHER_OFFSET: + return -8; + case PIPE_CAP_MAX_TEXEL_OFFSET: + case PIPE_CAP_MAX_TEXTURE_GATHER_OFFSET: + return 7; + + /* Unsupported features. */ + case PIPE_CAP_ANISOTROPIC_FILTER: + case PIPE_CAP_TEXTURE_BUFFER_OBJECTS: + case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY: + case PIPE_CAP_CUBE_MAP_ARRAY: + case PIPE_CAP_TEXTURE_MIRROR_CLAMP: + case PIPE_CAP_SEAMLESS_CUBE_MAP: + case PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY: + case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY: + case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY: + case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS: + case PIPE_CAP_SHADER_STENCIL_EXPORT: + case PIPE_CAP_TGSI_TEXCOORD: + case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER: + case PIPE_CAP_CONDITIONAL_RENDER: + case PIPE_CAP_TEXTURE_BARRIER: + case PIPE_CAP_INDEP_BLEND_FUNC: + case PIPE_CAP_DEPTH_CLIP_DISABLE: + case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE: + case PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS: + case PIPE_CAP_USER_VERTEX_BUFFERS: + case PIPE_CAP_QUERY_PIPELINE_STATISTICS: + case PIPE_CAP_TEXTURE_BORDER_COLOR_QUIRK: + case PIPE_CAP_TGSI_VS_LAYER_VIEWPORT: + case PIPE_CAP_TGSI_TES_LAYER_VIEWPORT: + case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS: + case PIPE_CAP_TEXTURE_GATHER_SM5: + case PIPE_CAP_FAKE_SW_MSAA: + case PIPE_CAP_SAMPLE_SHADING: + case PIPE_CAP_TEXTURE_GATHER_OFFSETS: + case PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION: + case PIPE_CAP_MAX_VERTEX_STREAMS: + case PIPE_CAP_MULTI_DRAW_INDIRECT: + case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS: + case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE: + case PIPE_CAP_CONDITIONAL_RENDER_INVERTED: + case PIPE_CAP_SAMPLER_VIEW_TARGET: + case PIPE_CAP_CLIP_HALFZ: + case PIPE_CAP_VERTEXID_NOBASE: + case PIPE_CAP_POLYGON_OFFSET_CLAMP: + case PIPE_CAP_MULTISAMPLE_Z_RESOLVE: + case PIPE_CAP_RESOURCE_FROM_USER_MEMORY: + case PIPE_CAP_DEVICE_RESET_STATUS_QUERY: + case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS: + case PIPE_CAP_TEXTURE_FLOAT_LINEAR: + case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR: + case PIPE_CAP_DEPTH_BOUNDS_TEST: + case PIPE_CAP_TGSI_TXQS: + case PIPE_CAP_FORCE_PERSAMPLE_INTERP: + case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS: + case PIPE_CAP_CLEAR_TEXTURE: + case PIPE_CAP_DRAW_PARAMETERS: + case PIPE_CAP_TGSI_PACK_HALF_FLOAT: + case PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL: + case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL: + case PIPE_CAP_INVALIDATE_BUFFER: + case PIPE_CAP_GENERATE_MIPMAP: + case PIPE_CAP_STRING_MARKER: + case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS: + case PIPE_CAP_QUERY_BUFFER_OBJECT: + case PIPE_CAP_QUERY_MEMORY_INFO: + case PIPE_CAP_PCI_GROUP: + case PIPE_CAP_PCI_BUS: + case PIPE_CAP_PCI_DEVICE: + case PIPE_CAP_PCI_FUNCTION: + case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT: + case PIPE_CAP_ROBUST_BUFFER_ACCESS_BEHAVIOR: + case PIPE_CAP_CULL_DISTANCE: + case PIPE_CAP_PRIMITIVE_RESTART_FOR_PATCHES: + case PIPE_CAP_TGSI_VOTE: + case PIPE_CAP_MAX_WINDOW_RECTANGLES: + case PIPE_CAP_POLYGON_OFFSET_UNITS_UNSCALED: + case PIPE_CAP_VIEWPORT_SUBPIXEL_BITS: + case PIPE_CAP_TGSI_ARRAY_COMPONENTS: + case PIPE_CAP_TGSI_FS_FBFETCH: + case PIPE_CAP_INT64: + case PIPE_CAP_INT64_DIVMOD: + case PIPE_CAP_DOUBLES: + case PIPE_CAP_BINDLESS_TEXTURE: + case PIPE_CAP_POST_DEPTH_COVERAGE: + case PIPE_CAP_CAN_BIND_CONST_BUFFER_AS_VERTEX: + case PIPE_CAP_TGSI_BALLOT: + case PIPE_CAP_SPARSE_BUFFER_PAGE_SIZE: + case PIPE_CAP_POLYGON_MODE_FILL_RECTANGLE: + case PIPE_CAP_TGSI_CLOCK: + case PIPE_CAP_TGSI_TEX_TXF_LZ: + case PIPE_CAP_NATIVE_FENCE_FD: + case PIPE_CAP_FENCE_SIGNAL: + case PIPE_CAP_TGSI_MUL_ZERO_WINS: + case PIPE_CAP_NIR_SAMPLERS_AS_DEREF: + case PIPE_CAP_QUERY_SO_OVERFLOW: + case PIPE_CAP_MEMOBJ: + case PIPE_CAP_LOAD_CONSTBUF: + case PIPE_CAP_TILE_RASTER_ORDER: + case PIPE_CAP_STREAM_OUTPUT_INTERLEAVE_BUFFERS: + case PIPE_CAP_MAX_COMBINED_SHADER_OUTPUT_RESOURCES: + case PIPE_CAP_CONTEXT_PRIORITY_MASK: + case PIPE_CAP_CONSTBUF0_FLAGS: + case PIPE_CAP_CONSERVATIVE_RASTER_POST_SNAP_TRIANGLES: + case PIPE_CAP_CONSERVATIVE_RASTER_POST_SNAP_POINTS_LINES: + case PIPE_CAP_CONSERVATIVE_RASTER_PRE_SNAP_TRIANGLES: + case PIPE_CAP_CONSERVATIVE_RASTER_PRE_SNAP_POINTS_LINES: + case PIPE_CAP_CONSERVATIVE_RASTER_POST_DEPTH_COVERAGE: + case PIPE_CAP_MAX_CONSERVATIVE_RASTER_SUBPIXEL_PRECISION_BIAS: + case PIPE_CAP_PACKED_UNIFORMS: + return 0; + + /* Geometry shader output, unsupported. */ + case PIPE_CAP_MAX_GEOMETRY_OUTPUT_VERTICES: + case PIPE_CAP_MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS: + return 0; + + /* Texturing. */ + case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: + case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: + case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: + return VC5_MAX_MIP_LEVELS; + case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS: + return 2048; + + /* Render targets. */ + case PIPE_CAP_MAX_RENDER_TARGETS: + return 4; + + /* Queries. */ + case PIPE_CAP_QUERY_TIME_ELAPSED: + case PIPE_CAP_QUERY_TIMESTAMP: + return 0; + + case PIPE_CAP_MAX_VERTEX_ATTRIB_STRIDE: + return 2048; + + case PIPE_CAP_ENDIANNESS: + return PIPE_ENDIAN_LITTLE; + + case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT: + return 64; + + case PIPE_CAP_VENDOR_ID: + return 0x14E4; + case PIPE_CAP_DEVICE_ID: + return 0xFFFFFFFF; + case PIPE_CAP_ACCELERATED: + return 1; + case PIPE_CAP_VIDEO_MEMORY: { + uint64_t system_memory; + + if (!os_get_total_physical_memory(&system_memory)) + return 0; + + return (int)(system_memory >> 20); + } + case PIPE_CAP_UMA: + return 1; + + default: + fprintf(stderr, "unknown param %d\n", param); + return 0; + } +} + +static float +vc5_screen_get_paramf(struct pipe_screen *pscreen, enum pipe_capf param) +{ + switch (param) { + case PIPE_CAPF_MAX_LINE_WIDTH: + case PIPE_CAPF_MAX_LINE_WIDTH_AA: + return 32; + + case PIPE_CAPF_MAX_POINT_WIDTH: + case PIPE_CAPF_MAX_POINT_WIDTH_AA: + return 512.0f; + + case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY: + return 0.0f; + case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS: + return 16.0f; + + case PIPE_CAPF_MIN_CONSERVATIVE_RASTER_DILATE: + case PIPE_CAPF_MAX_CONSERVATIVE_RASTER_DILATE: + case PIPE_CAPF_CONSERVATIVE_RASTER_DILATE_GRANULARITY: + return 0.0f; + default: + fprintf(stderr, "unknown paramf %d\n", param); + return 0; + } +} + +static int +vc5_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader, + enum pipe_shader_cap param) +{ + if (shader != PIPE_SHADER_VERTEX && + shader != PIPE_SHADER_FRAGMENT) { + return 0; + } + + /* this is probably not totally correct.. but it's a start: */ + switch (param) { + case PIPE_SHADER_CAP_MAX_INSTRUCTIONS: + case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS: + case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS: + case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS: + return 16384; + + case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH: + return UINT_MAX; + + case PIPE_SHADER_CAP_MAX_INPUTS: + if (shader == PIPE_SHADER_FRAGMENT) + return VC5_MAX_FS_INPUTS / 4; + else + return 16; + case PIPE_SHADER_CAP_MAX_OUTPUTS: + if (shader == PIPE_SHADER_FRAGMENT) + return 4; + else + return VC5_MAX_FS_INPUTS / 4; + case PIPE_SHADER_CAP_MAX_TEMPS: + return 256; /* GL_MAX_PROGRAM_TEMPORARIES_ARB */ + case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE: + return 16 * 1024 * sizeof(float); + case PIPE_SHADER_CAP_MAX_CONST_BUFFERS: + return 16; + case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED: + return 0; + case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR: + case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR: + case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR: + return 0; + case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR: + return 1; + case PIPE_SHADER_CAP_SUBROUTINES: + return 0; + case PIPE_SHADER_CAP_INTEGERS: + return 1; + case PIPE_SHADER_CAP_FP16: + case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED: + case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED: + case PIPE_SHADER_CAP_TGSI_LDEXP_SUPPORTED: + case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: + case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE: + case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED: + case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTERS: + case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTER_BUFFERS: + return 0; + case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS: + case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS: + case PIPE_SHADER_CAP_MAX_SHADER_IMAGES: + case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS: + return VC5_MAX_TEXTURE_SAMPLERS; + case PIPE_SHADER_CAP_PREFERRED_IR: + return PIPE_SHADER_IR_NIR; + case PIPE_SHADER_CAP_SUPPORTED_IRS: + return 0; + case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT: + return 32; + case PIPE_SHADER_CAP_LOWER_IF_THRESHOLD: + case PIPE_SHADER_CAP_TGSI_SKIP_MERGE_REGISTERS: + return 0; + default: + fprintf(stderr, "unknown shader param %d\n", param); + return 0; + } + return 0; +} + +static boolean +vc5_screen_is_format_supported(struct pipe_screen *pscreen, + enum pipe_format format, + enum pipe_texture_target target, + unsigned sample_count, + unsigned usage) +{ + struct vc5_screen *screen = vc5_screen(pscreen); + + if (sample_count > 1 && sample_count != VC5_MAX_SAMPLES) + return FALSE; + + if ((target >= PIPE_MAX_TEXTURE_TYPES) || + !util_format_is_supported(format, usage)) { + return FALSE; + } + + if (usage & PIPE_BIND_VERTEX_BUFFER) { + switch (format) { + case PIPE_FORMAT_R32G32B32A32_FLOAT: + case PIPE_FORMAT_R32G32B32_FLOAT: + case PIPE_FORMAT_R32G32_FLOAT: + case PIPE_FORMAT_R32_FLOAT: + case PIPE_FORMAT_R32G32B32A32_SNORM: + case PIPE_FORMAT_R32G32B32_SNORM: + case PIPE_FORMAT_R32G32_SNORM: + case PIPE_FORMAT_R32_SNORM: + case PIPE_FORMAT_R32G32B32A32_SSCALED: + case PIPE_FORMAT_R32G32B32_SSCALED: + case PIPE_FORMAT_R32G32_SSCALED: + case PIPE_FORMAT_R32_SSCALED: + case PIPE_FORMAT_R16G16B16A16_UNORM: + case PIPE_FORMAT_R16G16B16_UNORM: + case PIPE_FORMAT_R16G16_UNORM: + case PIPE_FORMAT_R16_UNORM: + case PIPE_FORMAT_R16G16B16A16_SNORM: + case PIPE_FORMAT_R16G16B16_SNORM: + case PIPE_FORMAT_R16G16_SNORM: + case PIPE_FORMAT_R16_SNORM: + case PIPE_FORMAT_R16G16B16A16_USCALED: + case PIPE_FORMAT_R16G16B16_USCALED: + case PIPE_FORMAT_R16G16_USCALED: + case PIPE_FORMAT_R16_USCALED: + case PIPE_FORMAT_R16G16B16A16_SSCALED: + case PIPE_FORMAT_R16G16B16_SSCALED: + case PIPE_FORMAT_R16G16_SSCALED: + case PIPE_FORMAT_R16_SSCALED: + case PIPE_FORMAT_R8G8B8A8_UNORM: + case PIPE_FORMAT_R8G8B8_UNORM: + case PIPE_FORMAT_R8G8_UNORM: + case PIPE_FORMAT_R8_UNORM: + case PIPE_FORMAT_R8G8B8A8_SNORM: + case PIPE_FORMAT_R8G8B8_SNORM: + case PIPE_FORMAT_R8G8_SNORM: + case PIPE_FORMAT_R8_SNORM: + case PIPE_FORMAT_R8G8B8A8_USCALED: + case PIPE_FORMAT_R8G8B8_USCALED: + case PIPE_FORMAT_R8G8_USCALED: + case PIPE_FORMAT_R8_USCALED: + case PIPE_FORMAT_R8G8B8A8_SSCALED: + case PIPE_FORMAT_R8G8B8_SSCALED: + case PIPE_FORMAT_R8G8_SSCALED: + case PIPE_FORMAT_R8_SSCALED: + case PIPE_FORMAT_R10G10B10A2_UNORM: + case PIPE_FORMAT_B10G10R10A2_UNORM: + case PIPE_FORMAT_R10G10B10A2_SNORM: + case PIPE_FORMAT_B10G10R10A2_SNORM: + case PIPE_FORMAT_R10G10B10A2_USCALED: + case PIPE_FORMAT_B10G10R10A2_USCALED: + case PIPE_FORMAT_R10G10B10A2_SSCALED: + case PIPE_FORMAT_B10G10R10A2_SSCALED: + break; + default: + return FALSE; + } + } + + if ((usage & PIPE_BIND_RENDER_TARGET) && + !vc5_rt_format_supported(&screen->devinfo, format)) { + return FALSE; + } + + if ((usage & PIPE_BIND_SAMPLER_VIEW) && + !vc5_tex_format_supported(&screen->devinfo, format)) { + return FALSE; + } + + if ((usage & PIPE_BIND_DEPTH_STENCIL) && + !(format == PIPE_FORMAT_S8_UINT_Z24_UNORM || + format == PIPE_FORMAT_X8Z24_UNORM || + format == PIPE_FORMAT_Z16_UNORM || + format == PIPE_FORMAT_Z32_FLOAT || + format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT)) { + return FALSE; + } + + if ((usage & PIPE_BIND_INDEX_BUFFER) && + !(format == PIPE_FORMAT_I8_UINT || + format == PIPE_FORMAT_I16_UINT || + format == PIPE_FORMAT_I32_UINT)) { + return FALSE; + } + + return TRUE; +} + +#define PTR_TO_UINT(x) ((unsigned)((intptr_t)(x))) + +static unsigned handle_hash(void *key) +{ + return PTR_TO_UINT(key); +} + +static int handle_compare(void *key1, void *key2) +{ + return PTR_TO_UINT(key1) != PTR_TO_UINT(key2); +} + +static bool +vc5_get_device_info(struct vc5_screen *screen) +{ + struct drm_v3d_get_param ident0 = { + .param = DRM_V3D_PARAM_V3D_CORE0_IDENT0, + }; + struct drm_v3d_get_param ident1 = { + .param = DRM_V3D_PARAM_V3D_CORE0_IDENT1, + }; + int ret; + + ret = vc5_ioctl(screen->fd, DRM_IOCTL_V3D_GET_PARAM, &ident0); + if (ret != 0) { + fprintf(stderr, "Couldn't get V3D core IDENT0: %s\n", + strerror(errno)); + return false; + } + ret = vc5_ioctl(screen->fd, DRM_IOCTL_V3D_GET_PARAM, &ident1); + if (ret != 0) { + fprintf(stderr, "Couldn't get V3D core IDENT1: %s\n", + strerror(errno)); + return false; + } + + uint32_t major = (ident0.value >> 24) & 0xff; + uint32_t minor = (ident1.value >> 0) & 0xf; + screen->devinfo.ver = major * 10 + minor; + + switch (screen->devinfo.ver) { + case 33: + case 41: + case 42: + break; + default: + fprintf(stderr, + "V3D %d.%d not supported by this version of Mesa.\n", + screen->devinfo.ver / 10, + screen->devinfo.ver % 10); + return false; + } + + return true; +} + +static const void * +vc5_screen_get_compiler_options(struct pipe_screen *pscreen, + enum pipe_shader_ir ir, unsigned shader) +{ + return &v3d_nir_options; +} + +struct pipe_screen * +v3d_screen_create(int fd) +{ + struct vc5_screen *screen = rzalloc(NULL, struct vc5_screen); + struct pipe_screen *pscreen; + + pscreen = &screen->base; + + pscreen->destroy = vc5_screen_destroy; + pscreen->get_param = vc5_screen_get_param; + pscreen->get_paramf = vc5_screen_get_paramf; + pscreen->get_shader_param = vc5_screen_get_shader_param; + pscreen->context_create = vc5_context_create; + pscreen->is_format_supported = vc5_screen_is_format_supported; + + screen->fd = fd; + list_inithead(&screen->bo_cache.time_list); + (void)mtx_init(&screen->bo_handles_mutex, mtx_plain); + screen->bo_handles = util_hash_table_create(handle_hash, handle_compare); + +#if defined(USE_V3D_SIMULATOR) + vc5_simulator_init(screen); +#endif + + if (!vc5_get_device_info(screen)) + goto fail; + + slab_create_parent(&screen->transfer_pool, sizeof(struct vc5_transfer), 16); + + vc5_fence_init(screen); + + v3d_process_debug_variable(); + + vc5_resource_screen_init(pscreen); + + screen->compiler = v3d_compiler_init(&screen->devinfo); + + pscreen->get_name = vc5_screen_get_name; + pscreen->get_vendor = vc5_screen_get_vendor; + pscreen->get_device_vendor = vc5_screen_get_vendor; + pscreen->get_compiler_options = vc5_screen_get_compiler_options; + + return pscreen; + +fail: + close(fd); + ralloc_free(pscreen); + return NULL; +} diff --git a/src/gallium/drivers/v3d/v3d_screen.h b/src/gallium/drivers/v3d/v3d_screen.h new file mode 100644 index 00000000000..975bfe01a75 --- /dev/null +++ b/src/gallium/drivers/v3d/v3d_screen.h @@ -0,0 +1,101 @@ +/* + * Copyright © 2014-2017 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef VC5_SCREEN_H +#define VC5_SCREEN_H + +#include "pipe/p_screen.h" +#include "os/os_thread.h" +#include "state_tracker/drm_driver.h" +#include "util/list.h" +#include "util/slab.h" +#include "broadcom/common/v3d_debug.h" +#include "broadcom/common/v3d_device_info.h" + +struct vc5_bo; + +#define VC5_MAX_MIP_LEVELS 12 +#define VC5_MAX_TEXTURE_SAMPLERS 32 +#define VC5_MAX_SAMPLES 4 +#define VC5_MAX_DRAW_BUFFERS 4 +#define VC5_MAX_ATTRIBUTES 16 + +/* These are tunable parameters in the HW design, but all the V3D + * implementations agree. + */ +#define VC5_UIFCFG_BANKS 8 +#define VC5_UIFCFG_PAGE_SIZE 4096 +#define VC5_UIFCFG_XOR_VALUE (1 << 4) +#define VC5_PAGE_CACHE_SIZE (VC5_UIFCFG_PAGE_SIZE * VC5_UIFCFG_BANKS) +#define VC5_UBLOCK_SIZE 64 +#define VC5_UIFBLOCK_SIZE (4 * VC5_UBLOCK_SIZE) +#define VC5_UIFBLOCK_ROW_SIZE (4 * VC5_UIFBLOCK_SIZE) + +struct vc5_simulator_file; + +struct vc5_screen { + struct pipe_screen base; + int fd; + + struct v3d_device_info devinfo; + + const char *name; + + struct slab_parent_pool transfer_pool; + + struct vc5_bo_cache { + /** List of struct vc5_bo freed, by age. */ + struct list_head time_list; + /** List of struct vc5_bo freed, per size, by age. */ + struct list_head *size_list; + uint32_t size_list_size; + + mtx_t lock; + + uint32_t bo_size; + uint32_t bo_count; + } bo_cache; + + const struct v3d_compiler *compiler; + + struct util_hash_table *bo_handles; + mtx_t bo_handles_mutex; + + uint32_t bo_size; + uint32_t bo_count; + + struct vc5_simulator_file *sim_file; +}; + +static inline struct vc5_screen * +vc5_screen(struct pipe_screen *screen) +{ + return (struct vc5_screen *)screen; +} + +struct pipe_screen *v3d_screen_create(int fd); + +void +vc5_fence_init(struct vc5_screen *screen); + +#endif /* VC5_SCREEN_H */ diff --git a/src/gallium/drivers/v3d/v3d_simulator.c b/src/gallium/drivers/v3d/v3d_simulator.c new file mode 100644 index 00000000000..86e4ed3be3d --- /dev/null +++ b/src/gallium/drivers/v3d/v3d_simulator.c @@ -0,0 +1,660 @@ +/* + * Copyright © 2014-2017 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/** + * @file vc5_simulator.c + * + * Implements VC5 simulation on top of a non-VC5 GEM fd. + * + * This file's goal is to emulate the VC5 ioctls' behavior in the kernel on + * top of the simpenrose software simulator. Generally, VC5 driver BOs have a + * GEM-side copy of their contents and a simulator-side memory area that the + * GEM contents get copied into during simulation. Once simulation is done, + * the simulator's data is copied back out to the GEM BOs, so that rendering + * appears on the screen as if actual hardware rendering had been done. + * + * One of the limitations of this code is that we shouldn't really need a + * GEM-side BO for non-window-system BOs. However, do we need unique BO + * handles for each of our GEM bos so that this file can look up its state + * from the handle passed in at submit ioctl time (also, a couple of places + * outside of this file still call ioctls directly on the fd). + * + * Another limitation is that BO import doesn't work unless the underlying + * window system's BO size matches what VC5 is going to use, which of course + * doesn't work out in practice. This means that for now, only DRI3 (VC5 + * makes the winsys BOs) is supported, not DRI2 (window system makes the winys + * BOs). + */ + +#ifdef USE_V3D_SIMULATOR + +#include +#include "util/hash_table.h" +#include "util/ralloc.h" +#include "util/set.h" +#include "util/u_memory.h" +#include "util/u_mm.h" +#include "v3d_simulator_wrapper.h" + +#include "v3d_screen.h" +#include "v3d_context.h" + +/** Global (across GEM fds) state for the simulator */ +static struct vc5_simulator_state { + mtx_t mutex; + + struct v3d_hw *v3d; + int ver; + + /* Base virtual address of the heap. */ + void *mem; + /* Base hardware address of the heap. */ + uint32_t mem_base; + /* Size of the heap. */ + size_t mem_size; + + struct mem_block *heap; + struct mem_block *overflow; + + /** Mapping from GEM handle to struct vc5_simulator_bo * */ + struct hash_table *fd_map; + + int refcount; +} sim_state = { + .mutex = _MTX_INITIALIZER_NP, +}; + +/** Per-GEM-fd state for the simulator. */ +struct vc5_simulator_file { + int fd; + + /** Mapping from GEM handle to struct vc5_simulator_bo * */ + struct hash_table *bo_map; + + struct mem_block *gmp; + void *gmp_vaddr; +}; + +/** Wrapper for drm_vc5_bo tracking the simulator-specific state. */ +struct vc5_simulator_bo { + struct vc5_simulator_file *file; + + /** Area for this BO within sim_state->mem */ + struct mem_block *block; + uint32_t size; + void *vaddr; + + void *winsys_map; + uint32_t winsys_stride; + + int handle; +}; + +static void * +int_to_key(int key) +{ + return (void *)(uintptr_t)key; +} + +static struct vc5_simulator_file * +vc5_get_simulator_file_for_fd(int fd) +{ + struct hash_entry *entry = _mesa_hash_table_search(sim_state.fd_map, + int_to_key(fd + 1)); + return entry ? entry->data : NULL; +} + +/* A marker placed just after each BO, then checked after rendering to make + * sure it's still there. + */ +#define BO_SENTINEL 0xfedcba98 + +/* 128kb */ +#define GMP_ALIGN2 17 + +/** + * Sets the range of GPU virtual address space to have the given GMP + * permissions (bit 0 = read, bit 1 = write, write-only forbidden). + */ +static void +set_gmp_flags(struct vc5_simulator_file *file, + uint32_t offset, uint32_t size, uint32_t flag) +{ + assert((offset & ((1 << GMP_ALIGN2) - 1)) == 0); + int gmp_offset = offset >> GMP_ALIGN2; + int gmp_count = align(size, 1 << GMP_ALIGN2) >> GMP_ALIGN2; + uint32_t *gmp = file->gmp_vaddr; + + assert(flag <= 0x3); + + for (int i = gmp_offset; i < gmp_offset + gmp_count; i++) { + int32_t bitshift = (i % 16) * 2; + gmp[i / 16] &= ~(0x3 << bitshift); + gmp[i / 16] |= flag << bitshift; + } +} + +/** + * Allocates space in simulator memory and returns a tracking struct for it + * that also contains the drm_gem_cma_object struct. + */ +static struct vc5_simulator_bo * +vc5_create_simulator_bo(int fd, int handle, unsigned size) +{ + struct vc5_simulator_file *file = vc5_get_simulator_file_for_fd(fd); + struct vc5_simulator_bo *sim_bo = rzalloc(file, + struct vc5_simulator_bo); + size = align(size, 4096); + + sim_bo->file = file; + sim_bo->handle = handle; + + mtx_lock(&sim_state.mutex); + sim_bo->block = u_mmAllocMem(sim_state.heap, size + 4, GMP_ALIGN2, 0); + mtx_unlock(&sim_state.mutex); + assert(sim_bo->block); + + set_gmp_flags(file, sim_bo->block->ofs, size, 0x3); + + sim_bo->size = size; + sim_bo->vaddr = sim_state.mem + sim_bo->block->ofs - sim_state.mem_base; + memset(sim_bo->vaddr, 0xd0, size); + + *(uint32_t *)(sim_bo->vaddr + sim_bo->size) = BO_SENTINEL; + + /* A handle of 0 is used for vc5_gem.c internal allocations that + * don't need to go in the lookup table. + */ + if (handle != 0) { + mtx_lock(&sim_state.mutex); + _mesa_hash_table_insert(file->bo_map, int_to_key(handle), + sim_bo); + mtx_unlock(&sim_state.mutex); + } + + return sim_bo; +} + +static void +vc5_free_simulator_bo(struct vc5_simulator_bo *sim_bo) +{ + struct vc5_simulator_file *sim_file = sim_bo->file; + + if (sim_bo->winsys_map) + munmap(sim_bo->winsys_map, sim_bo->size); + + set_gmp_flags(sim_file, sim_bo->block->ofs, sim_bo->size, 0x0); + + mtx_lock(&sim_state.mutex); + u_mmFreeMem(sim_bo->block); + if (sim_bo->handle) { + struct hash_entry *entry = + _mesa_hash_table_search(sim_file->bo_map, + int_to_key(sim_bo->handle)); + _mesa_hash_table_remove(sim_file->bo_map, entry); + } + mtx_unlock(&sim_state.mutex); + ralloc_free(sim_bo); +} + +static struct vc5_simulator_bo * +vc5_get_simulator_bo(struct vc5_simulator_file *file, int gem_handle) +{ + mtx_lock(&sim_state.mutex); + struct hash_entry *entry = + _mesa_hash_table_search(file->bo_map, int_to_key(gem_handle)); + mtx_unlock(&sim_state.mutex); + + return entry ? entry->data : NULL; +} + +static int +vc5_simulator_pin_bos(int fd, struct vc5_job *job) +{ + struct vc5_simulator_file *file = vc5_get_simulator_file_for_fd(fd); + struct set_entry *entry; + + set_foreach(job->bos, entry) { + struct vc5_bo *bo = (struct vc5_bo *)entry->key; + struct vc5_simulator_bo *sim_bo = + vc5_get_simulator_bo(file, bo->handle); + + vc5_bo_map(bo); + memcpy(sim_bo->vaddr, bo->map, bo->size); + } + + return 0; +} + +static int +vc5_simulator_unpin_bos(int fd, struct vc5_job *job) +{ + struct vc5_simulator_file *file = vc5_get_simulator_file_for_fd(fd); + struct set_entry *entry; + + set_foreach(job->bos, entry) { + struct vc5_bo *bo = (struct vc5_bo *)entry->key; + struct vc5_simulator_bo *sim_bo = + vc5_get_simulator_bo(file, bo->handle); + + if (*(uint32_t *)(sim_bo->vaddr + + sim_bo->size) != BO_SENTINEL) { + fprintf(stderr, "Buffer overflow in %s\n", bo->name); + } + + vc5_bo_map(bo); + memcpy(bo->map, sim_bo->vaddr, bo->size); + } + + return 0; +} + +#if 0 +static void +vc5_dump_to_file(struct vc5_exec_info *exec) +{ + static int dumpno = 0; + struct drm_vc5_get_hang_state *state; + struct drm_vc5_get_hang_state_bo *bo_state; + unsigned int dump_version = 0; + + if (!(vc5_debug & VC5_DEBUG_DUMP)) + return; + + state = calloc(1, sizeof(*state)); + + int unref_count = 0; + list_for_each_entry_safe(struct drm_vc5_bo, bo, &exec->unref_list, + unref_head) { + unref_count++; + } + + /* Add one more for the overflow area that isn't wrapped in a BO. */ + state->bo_count = exec->bo_count + unref_count + 1; + bo_state = calloc(state->bo_count, sizeof(*bo_state)); + + char *filename = NULL; + asprintf(&filename, "vc5-dri-%d.dump", dumpno++); + FILE *f = fopen(filename, "w+"); + if (!f) { + fprintf(stderr, "Couldn't open %s: %s", filename, + strerror(errno)); + return; + } + + fwrite(&dump_version, sizeof(dump_version), 1, f); + + state->ct0ca = exec->ct0ca; + state->ct0ea = exec->ct0ea; + state->ct1ca = exec->ct1ca; + state->ct1ea = exec->ct1ea; + state->start_bin = exec->ct0ca; + state->start_render = exec->ct1ca; + fwrite(state, sizeof(*state), 1, f); + + int i; + for (i = 0; i < exec->bo_count; i++) { + struct drm_gem_cma_object *cma_bo = exec->bo[i]; + bo_state[i].handle = i; /* Not used by the parser. */ + bo_state[i].paddr = cma_bo->paddr; + bo_state[i].size = cma_bo->base.size; + } + + list_for_each_entry_safe(struct drm_vc5_bo, bo, &exec->unref_list, + unref_head) { + struct drm_gem_cma_object *cma_bo = &bo->base; + bo_state[i].handle = 0; + bo_state[i].paddr = cma_bo->paddr; + bo_state[i].size = cma_bo->base.size; + i++; + } + + /* Add the static overflow memory area. */ + bo_state[i].handle = exec->bo_count; + bo_state[i].paddr = sim_state.overflow->ofs; + bo_state[i].size = sim_state.overflow->size; + i++; + + fwrite(bo_state, sizeof(*bo_state), state->bo_count, f); + + for (int i = 0; i < exec->bo_count; i++) { + struct drm_gem_cma_object *cma_bo = exec->bo[i]; + fwrite(cma_bo->vaddr, cma_bo->base.size, 1, f); + } + + list_for_each_entry_safe(struct drm_vc5_bo, bo, &exec->unref_list, + unref_head) { + struct drm_gem_cma_object *cma_bo = &bo->base; + fwrite(cma_bo->vaddr, cma_bo->base.size, 1, f); + } + + void *overflow = calloc(1, sim_state.overflow->size); + fwrite(overflow, 1, sim_state.overflow->size, f); + free(overflow); + + free(state); + free(bo_state); + fclose(f); +} +#endif + +int +vc5_simulator_flush(struct vc5_context *vc5, + struct drm_v3d_submit_cl *submit, struct vc5_job *job) +{ + struct vc5_screen *screen = vc5->screen; + int fd = screen->fd; + struct vc5_simulator_file *file = vc5_get_simulator_file_for_fd(fd); + struct vc5_surface *csurf = vc5_surface(vc5->framebuffer.cbufs[0]); + struct vc5_resource *ctex = csurf ? vc5_resource(csurf->base.texture) : NULL; + struct vc5_simulator_bo *csim_bo = ctex ? vc5_get_simulator_bo(file, ctex->bo->handle) : NULL; + uint32_t winsys_stride = ctex ? csim_bo->winsys_stride : 0; + uint32_t sim_stride = ctex ? ctex->slices[0].stride : 0; + uint32_t row_len = MIN2(sim_stride, winsys_stride); + int ret; + + if (ctex && csim_bo->winsys_map) { +#if 0 + fprintf(stderr, "%dx%d %d %d %d\n", + ctex->base.b.width0, ctex->base.b.height0, + winsys_stride, + sim_stride, + ctex->bo->size); +#endif + + for (int y = 0; y < ctex->base.height0; y++) { + memcpy(ctex->bo->map + y * sim_stride, + csim_bo->winsys_map + y * winsys_stride, + row_len); + } + } + + ret = vc5_simulator_pin_bos(fd, job); + if (ret) + return ret; + + //vc5_dump_to_file(&exec); + + if (sim_state.ver >= 41) + v3d41_simulator_flush(sim_state.v3d, submit, file->gmp->ofs); + else + v3d33_simulator_flush(sim_state.v3d, submit, file->gmp->ofs); + + ret = vc5_simulator_unpin_bos(fd, job); + if (ret) + return ret; + + if (ctex && csim_bo->winsys_map) { + for (int y = 0; y < ctex->base.height0; y++) { + memcpy(csim_bo->winsys_map + y * winsys_stride, + ctex->bo->map + y * sim_stride, + row_len); + } + } + + return 0; +} + +/** + * Map the underlying GEM object from the real hardware GEM handle. + */ +static void * +vc5_simulator_map_winsys_bo(int fd, struct vc5_simulator_bo *sim_bo) +{ + int ret; + void *map; + + struct drm_mode_map_dumb map_dumb = { + .handle = sim_bo->handle, + }; + ret = drmIoctl(fd, DRM_IOCTL_MODE_MAP_DUMB, &map_dumb); + if (ret != 0) { + fprintf(stderr, "map ioctl failure\n"); + abort(); + } + + map = mmap(NULL, sim_bo->size, PROT_READ | PROT_WRITE, MAP_SHARED, + fd, map_dumb.offset); + if (map == MAP_FAILED) { + fprintf(stderr, + "mmap of bo %d (offset 0x%016llx, size %d) failed\n", + sim_bo->handle, (long long)map_dumb.offset, + (int)sim_bo->size); + abort(); + } + + return map; +} + +/** + * Do fixups after a BO has been opened from a handle. + * + * This could be done at DRM_IOCTL_GEM_OPEN/DRM_IOCTL_GEM_PRIME_FD_TO_HANDLE + * time, but we're still using drmPrimeFDToHandle() so we have this helper to + * be called afterward instead. + */ +void vc5_simulator_open_from_handle(int fd, uint32_t winsys_stride, + int handle, uint32_t size) +{ + struct vc5_simulator_bo *sim_bo = + vc5_create_simulator_bo(fd, handle, size); + + sim_bo->winsys_stride = winsys_stride; + sim_bo->winsys_map = vc5_simulator_map_winsys_bo(fd, sim_bo); +} + +/** + * Simulated ioctl(fd, DRM_VC5_CREATE_BO) implementation. + * + * Making a VC5 BO is just a matter of making a corresponding BO on the host. + */ +static int +vc5_simulator_create_bo_ioctl(int fd, struct drm_v3d_create_bo *args) +{ + int ret; + struct drm_mode_create_dumb create = { + .width = 128, + .bpp = 8, + .height = (args->size + 127) / 128, + }; + + ret = drmIoctl(fd, DRM_IOCTL_MODE_CREATE_DUMB, &create); + assert(create.size >= args->size); + + args->handle = create.handle; + + struct vc5_simulator_bo *sim_bo = + vc5_create_simulator_bo(fd, create.handle, args->size); + + args->offset = sim_bo->block->ofs; + + return ret; +} + +/** + * Simulated ioctl(fd, DRM_VC5_MMAP_BO) implementation. + * + * We just pass this straight through to dumb mmap. + */ +static int +vc5_simulator_mmap_bo_ioctl(int fd, struct drm_v3d_mmap_bo *args) +{ + int ret; + struct drm_mode_map_dumb map = { + .handle = args->handle, + }; + + ret = drmIoctl(fd, DRM_IOCTL_MODE_MAP_DUMB, &map); + args->offset = map.offset; + + return ret; +} + +static int +vc5_simulator_get_bo_offset_ioctl(int fd, struct drm_v3d_get_bo_offset *args) +{ + struct vc5_simulator_file *file = vc5_get_simulator_file_for_fd(fd); + struct vc5_simulator_bo *sim_bo = vc5_get_simulator_bo(file, + args->handle); + + args->offset = sim_bo->block->ofs; + + return 0; +} + +static int +vc5_simulator_gem_close_ioctl(int fd, struct drm_gem_close *args) +{ + /* Free the simulator's internal tracking. */ + struct vc5_simulator_file *file = vc5_get_simulator_file_for_fd(fd); + struct vc5_simulator_bo *sim_bo = vc5_get_simulator_bo(file, + args->handle); + + vc5_free_simulator_bo(sim_bo); + + /* Pass the call on down. */ + return drmIoctl(fd, DRM_IOCTL_GEM_CLOSE, args); +} + +static int +vc5_simulator_get_param_ioctl(int fd, struct drm_v3d_get_param *args) +{ + if (sim_state.ver >= 41) + return v3d41_simulator_get_param_ioctl(sim_state.v3d, args); + else + return v3d33_simulator_get_param_ioctl(sim_state.v3d, args); +} + +int +vc5_simulator_ioctl(int fd, unsigned long request, void *args) +{ + switch (request) { + case DRM_IOCTL_V3D_CREATE_BO: + return vc5_simulator_create_bo_ioctl(fd, args); + case DRM_IOCTL_V3D_MMAP_BO: + return vc5_simulator_mmap_bo_ioctl(fd, args); + case DRM_IOCTL_V3D_GET_BO_OFFSET: + return vc5_simulator_get_bo_offset_ioctl(fd, args); + + case DRM_IOCTL_V3D_WAIT_BO: + /* We do all of the vc5 rendering synchronously, so we just + * return immediately on the wait ioctls. This ignores any + * native rendering to the host BO, so it does mean we race on + * front buffer rendering. + */ + return 0; + + case DRM_IOCTL_V3D_GET_PARAM: + return vc5_simulator_get_param_ioctl(fd, args); + + case DRM_IOCTL_GEM_CLOSE: + return vc5_simulator_gem_close_ioctl(fd, args); + + case DRM_IOCTL_GEM_OPEN: + case DRM_IOCTL_GEM_FLINK: + return drmIoctl(fd, request, args); + default: + fprintf(stderr, "Unknown ioctl 0x%08x\n", (int)request); + abort(); + } +} + +static void +vc5_simulator_init_global(const struct v3d_device_info *devinfo) +{ + mtx_lock(&sim_state.mutex); + if (sim_state.refcount++) { + mtx_unlock(&sim_state.mutex); + return; + } + + sim_state.v3d = v3d_hw_auto_new(NULL); + v3d_hw_alloc_mem(sim_state.v3d, 1024 * 1024 * 1024); + sim_state.mem_base = + v3d_hw_get_mem(sim_state.v3d, &sim_state.mem_size, + &sim_state.mem); + + /* Allocate from anywhere from 4096 up. We don't allocate at 0, + * because for OQs and some other addresses in the HW, 0 means + * disabled. + */ + sim_state.heap = u_mmInit(4096, sim_state.mem_size - 4096); + + /* Make a block of 0xd0 at address 0 to make sure we don't screw up + * and land there. + */ + struct mem_block *b = u_mmAllocMem(sim_state.heap, 4096, GMP_ALIGN2, 0); + memset(sim_state.mem + b->ofs - sim_state.mem_base, 0xd0, 4096); + + sim_state.ver = v3d_hw_get_version(sim_state.v3d); + + mtx_unlock(&sim_state.mutex); + + sim_state.fd_map = + _mesa_hash_table_create(NULL, + _mesa_hash_pointer, + _mesa_key_pointer_equal); + + if (sim_state.ver >= 41) + v3d41_simulator_init_regs(sim_state.v3d); + else + v3d33_simulator_init_regs(sim_state.v3d); +} + +void +vc5_simulator_init(struct vc5_screen *screen) +{ + vc5_simulator_init_global(&screen->devinfo); + + screen->sim_file = rzalloc(screen, struct vc5_simulator_file); + struct vc5_simulator_file *sim_file = screen->sim_file; + + screen->sim_file->bo_map = + _mesa_hash_table_create(screen->sim_file, + _mesa_hash_pointer, + _mesa_key_pointer_equal); + + mtx_lock(&sim_state.mutex); + _mesa_hash_table_insert(sim_state.fd_map, int_to_key(screen->fd + 1), + screen->sim_file); + mtx_unlock(&sim_state.mutex); + + sim_file->gmp = u_mmAllocMem(sim_state.heap, 8096, GMP_ALIGN2, 0); + sim_file->gmp_vaddr = (sim_state.mem + sim_file->gmp->ofs - + sim_state.mem_base); +} + +void +vc5_simulator_destroy(struct vc5_screen *screen) +{ + mtx_lock(&sim_state.mutex); + if (!--sim_state.refcount) { + _mesa_hash_table_destroy(sim_state.fd_map, NULL); + u_mmDestroy(sim_state.heap); + /* No memsetting the struct, because it contains the mutex. */ + sim_state.mem = NULL; + } + mtx_unlock(&sim_state.mutex); +} + +#endif /* USE_V3D_SIMULATOR */ diff --git a/src/gallium/drivers/v3d/v3d_simulator_wrapper.cpp b/src/gallium/drivers/v3d/v3d_simulator_wrapper.cpp new file mode 100644 index 00000000000..7b04ded2b53 --- /dev/null +++ b/src/gallium/drivers/v3d/v3d_simulator_wrapper.cpp @@ -0,0 +1,88 @@ +/* + * Copyright © 2017 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/** @file + * + * Wraps bits of the V3D simulator interface in a C interface for the + * v3d_simulator.c code to use. + */ + +#ifdef USE_V3D_SIMULATOR + +#include "v3d_simulator_wrapper.h" + +#define V3D_TECH_VERSION 3 +#define V3D_REVISION 3 +#define V3D_SUB_REV 0 +#define V3D_HIDDEN_REV 0 +#define V3D_COMPAT_REV 0 +#include "v3d_hw_auto.h" + +extern "C" { + +struct v3d_hw *v3d_hw_auto_new(void *in_params) +{ + return v3d_hw_auto_make_unique().release(); +} + + +uint32_t v3d_hw_get_mem(const struct v3d_hw *hw, size_t *size, void **p) +{ + return hw->get_mem(size, p); +} + +bool v3d_hw_alloc_mem(struct v3d_hw *hw, size_t min_size) +{ + return hw->alloc_mem(min_size) == V3D_HW_ALLOC_SUCCESS; +} + +bool v3d_hw_has_gca(struct v3d_hw *hw) +{ + return hw->has_gca(); +} + +uint32_t v3d_hw_read_reg(struct v3d_hw *hw, uint32_t reg) +{ + return hw->read_reg(reg); +} + +void v3d_hw_write_reg(struct v3d_hw *hw, uint32_t reg, uint32_t val) +{ + hw->write_reg(reg, val); +} + +void v3d_hw_tick(struct v3d_hw *hw) +{ + return hw->tick(); +} + +int v3d_hw_get_version(struct v3d_hw *hw) +{ + const V3D_HUB_IDENT_T *ident = hw->get_hub_ident(); + + return ident->tech_version * 10 + ident->revision; +} + +} + +#endif /* USE_V3D_SIMULATOR */ diff --git a/src/gallium/drivers/v3d/v3d_simulator_wrapper.h b/src/gallium/drivers/v3d/v3d_simulator_wrapper.h new file mode 100644 index 00000000000..8b5dca15ed9 --- /dev/null +++ b/src/gallium/drivers/v3d/v3d_simulator_wrapper.h @@ -0,0 +1,44 @@ +/* + * Copyright © 2017 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include + +struct v3d_hw; + +#ifdef __cplusplus +extern "C" { +#endif + +struct v3d_hw *v3d_hw_auto_new(void *params); +uint32_t v3d_hw_get_mem(const struct v3d_hw *hw, size_t *size, void **p); +bool v3d_hw_alloc_mem(struct v3d_hw *hw, size_t min_size); +bool v3d_hw_has_gca(struct v3d_hw *hw); +uint32_t v3d_hw_read_reg(struct v3d_hw *hw, uint32_t reg); +void v3d_hw_write_reg(struct v3d_hw *hw, uint32_t reg, uint32_t val); +void v3d_hw_tick(struct v3d_hw *hw); +int v3d_hw_get_version(struct v3d_hw *hw); + +#ifdef __cplusplus +} +#endif diff --git a/src/gallium/drivers/v3d/v3d_tiling.c b/src/gallium/drivers/v3d/v3d_tiling.c new file mode 100644 index 00000000000..f9c4a342184 --- /dev/null +++ b/src/gallium/drivers/v3d/v3d_tiling.c @@ -0,0 +1,389 @@ +/* + * Copyright © 2014-2017 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/** @file vc5_tiling.c + * + * Handles information about the VC5 tiling formats, and loading and storing + * from them. + */ + +#include +#include "v3d_screen.h" +#include "v3d_context.h" +#include "v3d_tiling.h" + +/** Return the width in pixels of a 64-byte microtile. */ +uint32_t +vc5_utile_width(int cpp) +{ + switch (cpp) { + case 1: + case 2: + return 8; + case 4: + case 8: + return 4; + case 16: + return 2; + default: + unreachable("unknown cpp"); + } +} + +/** Return the height in pixels of a 64-byte microtile. */ +uint32_t +vc5_utile_height(int cpp) +{ + switch (cpp) { + case 1: + return 8; + case 2: + case 4: + return 4; + case 8: + case 16: + return 2; + default: + unreachable("unknown cpp"); + } +} + +/** + * Returns the byte address for a given pixel within a utile. + * + * Utiles are 64b blocks of pixels in raster order, with 32bpp being a 4x4 + * arrangement. + */ +static inline uint32_t +vc5_get_utile_pixel_offset(uint32_t cpp, uint32_t x, uint32_t y) +{ + uint32_t utile_w = vc5_utile_width(cpp); + uint32_t utile_h = vc5_utile_height(cpp); + + assert(x < utile_w && y < utile_h); + + return x * cpp + y * utile_w * cpp; +} + +/** + * Returns the byte offset for a given pixel in a LINEARTILE layout. + * + * LINEARTILE is a single line of utiles in either the X or Y direction. + */ +static inline uint32_t +vc5_get_lt_pixel_offset(uint32_t cpp, uint32_t image_h, uint32_t x, uint32_t y) +{ + uint32_t utile_w = vc5_utile_width(cpp); + uint32_t utile_h = vc5_utile_height(cpp); + uint32_t utile_index_x = x / utile_w; + uint32_t utile_index_y = y / utile_h; + + assert(utile_index_x == 0 || utile_index_y == 0); + + return (64 * (utile_index_x + utile_index_y) + + vc5_get_utile_pixel_offset(cpp, + x & (utile_w - 1), + y & (utile_h - 1))); +} + +/** + * Returns the byte offset for a given pixel in a UBLINEAR layout. + * + * UBLINEAR is the layout where pixels are arranged in UIF blocks (2x2 + * utiles), and the UIF blocks are in 1 or 2 columns in raster order. + */ +static inline uint32_t +vc5_get_ublinear_pixel_offset(uint32_t cpp, uint32_t x, uint32_t y, + int ublinear_number) +{ + uint32_t utile_w = vc5_utile_width(cpp); + uint32_t utile_h = vc5_utile_height(cpp); + uint32_t ub_w = utile_w * 2; + uint32_t ub_h = utile_h * 2; + uint32_t ub_x = x / ub_w; + uint32_t ub_y = y / ub_h; + + return (256 * (ub_y * ublinear_number + + ub_x) + + ((x & utile_w) ? 64 : 0) + + ((y & utile_h) ? 128 : 0) + + + vc5_get_utile_pixel_offset(cpp, + x & (utile_w - 1), + y & (utile_h - 1))); +} + +static inline uint32_t +vc5_get_ublinear_2_column_pixel_offset(uint32_t cpp, uint32_t image_h, + uint32_t x, uint32_t y) +{ + return vc5_get_ublinear_pixel_offset(cpp, x, y, 2); +} + +static inline uint32_t +vc5_get_ublinear_1_column_pixel_offset(uint32_t cpp, uint32_t image_h, + uint32_t x, uint32_t y) +{ + return vc5_get_ublinear_pixel_offset(cpp, x, y, 1); +} + +/** + * Returns the byte offset for a given pixel in a UIF layout. + * + * UIF is the general VC5 tiling layout shared across 3D, media, and scanout. + * It stores pixels in UIF blocks (2x2 utiles), and UIF blocks are stored in + * 4x4 groups, and those 4x4 groups are then stored in raster order. + */ +static inline uint32_t +vc5_get_uif_pixel_offset(uint32_t cpp, uint32_t image_h, uint32_t x, uint32_t y, + bool do_xor) +{ + uint32_t utile_w = vc5_utile_width(cpp); + uint32_t utile_h = vc5_utile_height(cpp); + uint32_t mb_width = utile_w * 2; + uint32_t mb_height = utile_h * 2; + uint32_t log2_mb_width = ffs(mb_width) - 1; + uint32_t log2_mb_height = ffs(mb_height) - 1; + + /* Macroblock X, y */ + uint32_t mb_x = x >> log2_mb_width; + uint32_t mb_y = y >> log2_mb_height; + /* X, y within the macroblock */ + uint32_t mb_pixel_x = x - (mb_x << log2_mb_width); + uint32_t mb_pixel_y = y - (mb_y << log2_mb_height); + + if (do_xor && (mb_x / 4) & 1) + mb_y ^= 0x10; + + uint32_t mb_h = align(image_h, 1 << log2_mb_height) >> log2_mb_height; + uint32_t mb_id = ((mb_x / 4) * ((mb_h - 1) * 4)) + mb_x + mb_y * 4; + + uint32_t mb_base_addr = mb_id * 256; + + bool top = mb_pixel_y < utile_h; + bool left = mb_pixel_x < utile_w; + + /* Docs have this in pixels, we do bytes here. */ + uint32_t mb_tile_offset = (!top * 128 + !left * 64); + + uint32_t utile_x = mb_pixel_x & (utile_w - 1); + uint32_t utile_y = mb_pixel_y & (utile_h - 1); + + uint32_t mb_pixel_address = (mb_base_addr + + mb_tile_offset + + vc5_get_utile_pixel_offset(cpp, + utile_x, + utile_y)); + + return mb_pixel_address; +} + +static inline uint32_t +vc5_get_uif_xor_pixel_offset(uint32_t cpp, uint32_t image_h, + uint32_t x, uint32_t y) +{ + return vc5_get_uif_pixel_offset(cpp, image_h, x, y, true); +} + +static inline uint32_t +vc5_get_uif_no_xor_pixel_offset(uint32_t cpp, uint32_t image_h, + uint32_t x, uint32_t y) +{ + return vc5_get_uif_pixel_offset(cpp, image_h, x, y, false); +} + +static inline void +vc5_move_pixels_general_percpp(void *gpu, uint32_t gpu_stride, + void *cpu, uint32_t cpu_stride, + int cpp, uint32_t image_h, + const struct pipe_box *box, + uint32_t (*get_pixel_offset)(uint32_t cpp, + uint32_t image_h, + uint32_t x, uint32_t y), + bool is_load) +{ + for (uint32_t y = 0; y < box->height; y++) { + void *cpu_row = cpu + y * cpu_stride; + + for (int x = 0; x < box->width; x++) { + uint32_t pixel_offset = get_pixel_offset(cpp, image_h, + box->x + x, + box->y + y); + + if (false) { + fprintf(stderr, "%3d,%3d -> %d\n", + box->x + x, box->y + y, + pixel_offset); + } + + if (is_load) { + memcpy(cpu_row + x * cpp, + gpu + pixel_offset, + cpp); + } else { + memcpy(gpu + pixel_offset, + cpu_row + x * cpp, + cpp); + } + } + } +} + +static inline void +vc5_move_pixels_general(void *gpu, uint32_t gpu_stride, + void *cpu, uint32_t cpu_stride, + int cpp, uint32_t image_h, + const struct pipe_box *box, + uint32_t (*get_pixel_offset)(uint32_t cpp, + uint32_t image_h, + uint32_t x, uint32_t y), + bool is_load) +{ + switch (cpp) { + case 1: + vc5_move_pixels_general_percpp(gpu, gpu_stride, + cpu, cpu_stride, + 1, image_h, box, + get_pixel_offset, + is_load); + break; + case 2: + vc5_move_pixels_general_percpp(gpu, gpu_stride, + cpu, cpu_stride, + 2, image_h, box, + get_pixel_offset, + is_load); + break; + case 4: + vc5_move_pixels_general_percpp(gpu, gpu_stride, + cpu, cpu_stride, + 4, image_h, box, + get_pixel_offset, + is_load); + break; + case 8: + vc5_move_pixels_general_percpp(gpu, gpu_stride, + cpu, cpu_stride, + 8, image_h, box, + get_pixel_offset, + is_load); + break; + case 16: + vc5_move_pixels_general_percpp(gpu, gpu_stride, + cpu, cpu_stride, + 16, image_h, box, + get_pixel_offset, + is_load); + break; + } +} + +static inline void +vc5_move_tiled_image(void *gpu, uint32_t gpu_stride, + void *cpu, uint32_t cpu_stride, + enum vc5_tiling_mode tiling_format, + int cpp, + uint32_t image_h, + const struct pipe_box *box, + bool is_load) +{ + switch (tiling_format) { + case VC5_TILING_UIF_XOR: + vc5_move_pixels_general(gpu, gpu_stride, + cpu, cpu_stride, + cpp, image_h, box, + vc5_get_uif_xor_pixel_offset, + is_load); + break; + case VC5_TILING_UIF_NO_XOR: + vc5_move_pixels_general(gpu, gpu_stride, + cpu, cpu_stride, + cpp, image_h, box, + vc5_get_uif_no_xor_pixel_offset, + is_load); + break; + case VC5_TILING_UBLINEAR_2_COLUMN: + vc5_move_pixels_general(gpu, gpu_stride, + cpu, cpu_stride, + cpp, image_h, box, + vc5_get_ublinear_2_column_pixel_offset, + is_load); + break; + case VC5_TILING_UBLINEAR_1_COLUMN: + vc5_move_pixels_general(gpu, gpu_stride, + cpu, cpu_stride, + cpp, image_h, box, + vc5_get_ublinear_1_column_pixel_offset, + is_load); + break; + case VC5_TILING_LINEARTILE: + vc5_move_pixels_general(gpu, gpu_stride, + cpu, cpu_stride, + cpp, image_h, box, + vc5_get_lt_pixel_offset, + is_load); + break; + default: + unreachable("Unsupported tiling format"); + break; + } +} + +/** + * Loads pixel data from the start (microtile-aligned) box in \p src to the + * start of \p dst according to the given tiling format. + */ +void +vc5_load_tiled_image(void *dst, uint32_t dst_stride, + void *src, uint32_t src_stride, + enum vc5_tiling_mode tiling_format, int cpp, + uint32_t image_h, + const struct pipe_box *box) +{ + vc5_move_tiled_image(src, src_stride, + dst, dst_stride, + tiling_format, + cpp, + image_h, + box, + true); +} + +/** + * Stores pixel data from the start of \p src into a (microtile-aligned) box in + * \p dst according to the given tiling format. + */ +void +vc5_store_tiled_image(void *dst, uint32_t dst_stride, + void *src, uint32_t src_stride, + enum vc5_tiling_mode tiling_format, int cpp, + uint32_t image_h, + const struct pipe_box *box) +{ + vc5_move_tiled_image(dst, dst_stride, + src, src_stride, + tiling_format, + cpp, + image_h, + box, + false); +} diff --git a/src/gallium/drivers/v3d/v3d_tiling.h b/src/gallium/drivers/v3d/v3d_tiling.h new file mode 100644 index 00000000000..d3cf48c4527 --- /dev/null +++ b/src/gallium/drivers/v3d/v3d_tiling.h @@ -0,0 +1,43 @@ +/* + * Copyright © 2014 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef VC5_TILING_H +#define VC5_TILING_H + +uint32_t vc5_utile_width(int cpp) ATTRIBUTE_CONST; +uint32_t vc5_utile_height(int cpp) ATTRIBUTE_CONST; +bool vc5_size_is_lt(uint32_t width, uint32_t height, int cpp) ATTRIBUTE_CONST; +void vc5_load_utile(void *dst, void *src, uint32_t dst_stride, uint32_t cpp); +void vc5_store_utile(void *dst, void *src, uint32_t src_stride, uint32_t cpp); +void vc5_load_tiled_image(void *dst, uint32_t dst_stride, + void *src, uint32_t src_stride, + enum vc5_tiling_mode tiling_format, int cpp, + uint32_t image_h, + const struct pipe_box *box); +void vc5_store_tiled_image(void *dst, uint32_t dst_stride, + void *src, uint32_t src_stride, + enum vc5_tiling_mode tiling_format, int cpp, + uint32_t image_h, + const struct pipe_box *box); + +#endif /* VC5_TILING_H */ diff --git a/src/gallium/drivers/v3d/v3d_uniforms.c b/src/gallium/drivers/v3d/v3d_uniforms.c new file mode 100644 index 00000000000..c7a39b50a74 --- /dev/null +++ b/src/gallium/drivers/v3d/v3d_uniforms.c @@ -0,0 +1,489 @@ +/* + * Copyright © 2014-2017 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "util/u_pack_color.h" +#include "util/format_srgb.h" + +#include "v3d_context.h" +#include "compiler/v3d_compiler.h" +#include "broadcom/cle/v3d_packet_v33_pack.h" + +#if 0 + +#define SWIZ(x,y,z,w) { \ + PIPE_SWIZZLE_##x, \ + PIPE_SWIZZLE_##y, \ + PIPE_SWIZZLE_##z, \ + PIPE_SWIZZLE_##w \ +} + +static void +write_texture_border_color(struct vc5_job *job, + struct vc5_cl_out **uniforms, + struct vc5_texture_stateobj *texstate, + uint32_t unit) +{ + struct pipe_sampler_state *sampler = texstate->samplers[unit]; + struct pipe_sampler_view *texture = texstate->textures[unit]; + struct vc5_resource *rsc = vc5_resource(texture->texture); + union util_color uc; + + const struct util_format_description *tex_format_desc = + util_format_description(texture->format); + + float border_color[4]; + for (int i = 0; i < 4; i++) + border_color[i] = sampler->border_color.f[i]; + if (util_format_is_srgb(texture->format)) { + for (int i = 0; i < 3; i++) + border_color[i] = + util_format_linear_to_srgb_float(border_color[i]); + } + + /* Turn the border color into the layout of channels that it would + * have when stored as texture contents. + */ + float storage_color[4]; + util_format_unswizzle_4f(storage_color, + border_color, + tex_format_desc->swizzle); + + /* Now, pack so that when the vc5_format-sampled texture contents are + * replaced with our border color, the vc5_get_format_swizzle() + * swizzling will get the right channels. + */ + if (util_format_is_depth_or_stencil(texture->format)) { + uc.ui[0] = util_pack_z(PIPE_FORMAT_Z24X8_UNORM, + sampler->border_color.f[0]) << 8; + } else { + switch (rsc->vc5_format) { + default: + case VC5_TEXTURE_TYPE_RGBA8888: + util_pack_color(storage_color, + PIPE_FORMAT_R8G8B8A8_UNORM, &uc); + break; + case VC5_TEXTURE_TYPE_RGBA4444: + util_pack_color(storage_color, + PIPE_FORMAT_A8B8G8R8_UNORM, &uc); + break; + case VC5_TEXTURE_TYPE_RGB565: + util_pack_color(storage_color, + PIPE_FORMAT_B8G8R8A8_UNORM, &uc); + break; + case VC5_TEXTURE_TYPE_ALPHA: + uc.ui[0] = float_to_ubyte(storage_color[0]) << 24; + break; + case VC5_TEXTURE_TYPE_LUMALPHA: + uc.ui[0] = ((float_to_ubyte(storage_color[1]) << 24) | + (float_to_ubyte(storage_color[0]) << 0)); + break; + } + } + + cl_aligned_u32(uniforms, uc.ui[0]); +} +#endif + +static uint32_t +get_texrect_scale(struct vc5_texture_stateobj *texstate, + enum quniform_contents contents, + uint32_t data) +{ + struct pipe_sampler_view *texture = texstate->textures[data]; + uint32_t dim; + + if (contents == QUNIFORM_TEXRECT_SCALE_X) + dim = texture->texture->width0; + else + dim = texture->texture->height0; + + return fui(1.0f / dim); +} + +static uint32_t +get_texture_size(struct vc5_texture_stateobj *texstate, + enum quniform_contents contents, + uint32_t data) +{ + struct pipe_sampler_view *texture = texstate->textures[data]; + + switch (contents) { + case QUNIFORM_TEXTURE_WIDTH: + return u_minify(texture->texture->width0, + texture->u.tex.first_level); + case QUNIFORM_TEXTURE_HEIGHT: + return u_minify(texture->texture->height0, + texture->u.tex.first_level); + case QUNIFORM_TEXTURE_DEPTH: + return u_minify(texture->texture->depth0, + texture->u.tex.first_level); + case QUNIFORM_TEXTURE_ARRAY_SIZE: + return texture->texture->array_size; + case QUNIFORM_TEXTURE_LEVELS: + return (texture->u.tex.last_level - + texture->u.tex.first_level) + 1; + default: + unreachable("Bad texture size field"); + } +} + +static struct vc5_bo * +vc5_upload_ubo(struct vc5_context *vc5, + struct vc5_compiled_shader *shader, + const uint32_t *gallium_uniforms) +{ + if (!shader->prog_data.base->ubo_size) + return NULL; + + struct vc5_bo *ubo = vc5_bo_alloc(vc5->screen, + shader->prog_data.base->ubo_size, + "ubo"); + void *data = vc5_bo_map(ubo); + for (uint32_t i = 0; i < shader->prog_data.base->num_ubo_ranges; i++) { + memcpy(data + shader->prog_data.base->ubo_ranges[i].dst_offset, + ((const void *)gallium_uniforms + + shader->prog_data.base->ubo_ranges[i].src_offset), + shader->prog_data.base->ubo_ranges[i].size); + } + + return ubo; +} + +/** + * Writes the V3D 3.x P0 (CFG_MODE=1) texture parameter. + * + * Some bits of this field are dependent on the type of sample being done by + * the shader, while other bits are dependent on the sampler state. We OR the + * two together here. + */ +static void +write_texture_p0(struct vc5_job *job, + struct vc5_cl_out **uniforms, + struct vc5_texture_stateobj *texstate, + uint32_t unit, + uint32_t shader_data) +{ + struct pipe_sampler_state *psampler = texstate->samplers[unit]; + struct vc5_sampler_state *sampler = vc5_sampler_state(psampler); + + cl_aligned_u32(uniforms, shader_data | sampler->p0); +} + +/** Writes the V3D 3.x P1 (CFG_MODE=1) texture parameter. */ +static void +write_texture_p1(struct vc5_job *job, + struct vc5_cl_out **uniforms, + struct vc5_texture_stateobj *texstate, + uint32_t data) +{ + /* Extract the texture unit from the top bits, and the compiler's + * packed p1 from the bottom. + */ + uint32_t unit = data >> 5; + uint32_t p1 = data & 0x1f; + + struct pipe_sampler_view *psview = texstate->textures[unit]; + struct vc5_sampler_view *sview = vc5_sampler_view(psview); + + struct V3D33_TEXTURE_UNIFORM_PARAMETER_1_CFG_MODE1 unpacked = { + .texture_state_record_base_address = texstate->texture_state[unit], + }; + + uint32_t packed; + V3D33_TEXTURE_UNIFORM_PARAMETER_1_CFG_MODE1_pack(&job->indirect, + (uint8_t *)&packed, + &unpacked); + + cl_aligned_u32(uniforms, p1 | packed | sview->p1); +} + +/** Writes the V3D 4.x TMU configuration parameter 0. */ +static void +write_tmu_p0(struct vc5_job *job, + struct vc5_cl_out **uniforms, + struct vc5_texture_stateobj *texstate, + uint32_t data) +{ + /* Extract the texture unit from the top bits, and the compiler's + * packed p0 from the bottom. + */ + uint32_t unit = data >> 24; + uint32_t p0 = data & 0x00ffffff; + + struct pipe_sampler_view *psview = texstate->textures[unit]; + struct vc5_sampler_view *sview = vc5_sampler_view(psview); + struct vc5_resource *rsc = vc5_resource(psview->texture); + + cl_aligned_reloc(&job->indirect, uniforms, sview->bo, p0); + vc5_job_add_bo(job, rsc->bo); +} + +/** Writes the V3D 4.x TMU configuration parameter 1. */ +static void +write_tmu_p1(struct vc5_job *job, + struct vc5_cl_out **uniforms, + struct vc5_texture_stateobj *texstate, + uint32_t data) +{ + /* Extract the texture unit from the top bits, and the compiler's + * packed p1 from the bottom. + */ + uint32_t unit = data >> 24; + uint32_t p0 = data & 0x00ffffff; + + struct pipe_sampler_state *psampler = texstate->samplers[unit]; + struct vc5_sampler_state *sampler = vc5_sampler_state(psampler); + + cl_aligned_reloc(&job->indirect, uniforms, sampler->bo, p0); +} + +struct vc5_cl_reloc +vc5_write_uniforms(struct vc5_context *vc5, struct vc5_compiled_shader *shader, + struct vc5_constbuf_stateobj *cb, + struct vc5_texture_stateobj *texstate) +{ + struct v3d_uniform_list *uinfo = &shader->prog_data.base->uniforms; + struct vc5_job *job = vc5->job; + const uint32_t *gallium_uniforms = cb->cb[0].user_buffer; + struct vc5_bo *ubo = vc5_upload_ubo(vc5, shader, gallium_uniforms); + + /* We always need to return some space for uniforms, because the HW + * will be prefetching, even if we don't read any in the program. + */ + vc5_cl_ensure_space(&job->indirect, MAX2(uinfo->count, 1) * 4, 4); + + struct vc5_cl_reloc uniform_stream = cl_get_address(&job->indirect); + vc5_bo_reference(uniform_stream.bo); + + struct vc5_cl_out *uniforms = + cl_start(&job->indirect); + + for (int i = 0; i < uinfo->count; i++) { + + switch (uinfo->contents[i]) { + case QUNIFORM_CONSTANT: + cl_aligned_u32(&uniforms, uinfo->data[i]); + break; + case QUNIFORM_UNIFORM: + cl_aligned_u32(&uniforms, + gallium_uniforms[uinfo->data[i]]); + break; + case QUNIFORM_VIEWPORT_X_SCALE: + cl_aligned_f(&uniforms, vc5->viewport.scale[0] * 256.0f); + break; + case QUNIFORM_VIEWPORT_Y_SCALE: + cl_aligned_f(&uniforms, vc5->viewport.scale[1] * 256.0f); + break; + + case QUNIFORM_VIEWPORT_Z_OFFSET: + cl_aligned_f(&uniforms, vc5->viewport.translate[2]); + break; + case QUNIFORM_VIEWPORT_Z_SCALE: + cl_aligned_f(&uniforms, vc5->viewport.scale[2]); + break; + + case QUNIFORM_USER_CLIP_PLANE: + cl_aligned_f(&uniforms, + vc5->clip.ucp[uinfo->data[i] / 4][uinfo->data[i] % 4]); + break; + + case QUNIFORM_TMU_CONFIG_P0: + write_tmu_p0(job, &uniforms, texstate, + uinfo->data[i]); + break; + + case QUNIFORM_TMU_CONFIG_P1: + write_tmu_p1(job, &uniforms, texstate, + uinfo->data[i]); + break; + + case QUNIFORM_TEXTURE_CONFIG_P1: + write_texture_p1(job, &uniforms, texstate, + uinfo->data[i]); + break; + +#if 0 + case QUNIFORM_TEXTURE_FIRST_LEVEL: + write_texture_first_level(job, &uniforms, texstate, + uinfo->data[i]); + break; +#endif + + case QUNIFORM_TEXRECT_SCALE_X: + case QUNIFORM_TEXRECT_SCALE_Y: + cl_aligned_u32(&uniforms, + get_texrect_scale(texstate, + uinfo->contents[i], + uinfo->data[i])); + break; + + case QUNIFORM_TEXTURE_WIDTH: + case QUNIFORM_TEXTURE_HEIGHT: + case QUNIFORM_TEXTURE_DEPTH: + case QUNIFORM_TEXTURE_ARRAY_SIZE: + case QUNIFORM_TEXTURE_LEVELS: + cl_aligned_u32(&uniforms, + get_texture_size(texstate, + uinfo->contents[i], + uinfo->data[i])); + break; + + case QUNIFORM_STENCIL: + cl_aligned_u32(&uniforms, + vc5->zsa->stencil_uniforms[uinfo->data[i]] | + (uinfo->data[i] <= 1 ? + (vc5->stencil_ref.ref_value[uinfo->data[i]] << 8) : + 0)); + break; + + case QUNIFORM_ALPHA_REF: + cl_aligned_f(&uniforms, + vc5->zsa->base.alpha.ref_value); + break; + + case QUNIFORM_SAMPLE_MASK: + cl_aligned_u32(&uniforms, vc5->sample_mask); + break; + + case QUNIFORM_UBO_ADDR: + if (uinfo->data[i] == 0) { + cl_aligned_reloc(&job->indirect, &uniforms, + ubo, 0); + } else { + int ubo_index = uinfo->data[i]; + struct vc5_resource *rsc = + vc5_resource(cb->cb[ubo_index].buffer); + + cl_aligned_reloc(&job->indirect, &uniforms, + rsc->bo, + cb->cb[ubo_index].buffer_offset); + } + break; + + case QUNIFORM_TEXTURE_FIRST_LEVEL: + cl_aligned_f(&uniforms, + texstate->textures[uinfo->data[i]]->u.tex.first_level); + break; + + case QUNIFORM_TEXTURE_BORDER_COLOR: + /* XXX */ + break; + + case QUNIFORM_SPILL_OFFSET: + cl_aligned_reloc(&job->indirect, &uniforms, + vc5->prog.spill_bo, 0); + break; + + case QUNIFORM_SPILL_SIZE_PER_THREAD: + cl_aligned_u32(&uniforms, + vc5->prog.spill_size_per_thread); + break; + + default: + assert(quniform_contents_is_texture_p0(uinfo->contents[i])); + + write_texture_p0(job, &uniforms, texstate, + uinfo->contents[i] - + QUNIFORM_TEXTURE_CONFIG_P0_0, + uinfo->data[i]); + break; + + } +#if 0 + uint32_t written_val = *((uint32_t *)uniforms - 1); + fprintf(stderr, "shader %p[%d]: 0x%08x / 0x%08x (%f)\n", + shader, i, __gen_address_offset(&uniform_stream) + i * 4, + written_val, uif(written_val)); +#endif + } + + cl_end(&job->indirect, uniforms); + + vc5_bo_unreference(&ubo); + + return uniform_stream; +} + +void +vc5_set_shader_uniform_dirty_flags(struct vc5_compiled_shader *shader) +{ + uint32_t dirty = 0; + + for (int i = 0; i < shader->prog_data.base->uniforms.count; i++) { + switch (shader->prog_data.base->uniforms.contents[i]) { + case QUNIFORM_CONSTANT: + break; + case QUNIFORM_UNIFORM: + case QUNIFORM_UBO_ADDR: + dirty |= VC5_DIRTY_CONSTBUF; + break; + + case QUNIFORM_VIEWPORT_X_SCALE: + case QUNIFORM_VIEWPORT_Y_SCALE: + case QUNIFORM_VIEWPORT_Z_OFFSET: + case QUNIFORM_VIEWPORT_Z_SCALE: + dirty |= VC5_DIRTY_VIEWPORT; + break; + + case QUNIFORM_USER_CLIP_PLANE: + dirty |= VC5_DIRTY_CLIP; + break; + + case QUNIFORM_TMU_CONFIG_P0: + case QUNIFORM_TMU_CONFIG_P1: + case QUNIFORM_TEXTURE_CONFIG_P1: + case QUNIFORM_TEXTURE_BORDER_COLOR: + case QUNIFORM_TEXTURE_FIRST_LEVEL: + case QUNIFORM_TEXRECT_SCALE_X: + case QUNIFORM_TEXRECT_SCALE_Y: + case QUNIFORM_TEXTURE_WIDTH: + case QUNIFORM_TEXTURE_HEIGHT: + case QUNIFORM_TEXTURE_DEPTH: + case QUNIFORM_TEXTURE_ARRAY_SIZE: + case QUNIFORM_TEXTURE_LEVELS: + case QUNIFORM_SPILL_OFFSET: + case QUNIFORM_SPILL_SIZE_PER_THREAD: + /* We could flag this on just the stage we're + * compiling for, but it's not passed in. + */ + dirty |= VC5_DIRTY_FRAGTEX | VC5_DIRTY_VERTTEX; + break; + + case QUNIFORM_STENCIL: + case QUNIFORM_ALPHA_REF: + dirty |= VC5_DIRTY_ZSA; + break; + + case QUNIFORM_SAMPLE_MASK: + dirty |= VC5_DIRTY_SAMPLE_MASK; + break; + + default: + assert(quniform_contents_is_texture_p0(shader->prog_data.base->uniforms.contents[i])); + dirty |= VC5_DIRTY_FRAGTEX | VC5_DIRTY_VERTTEX; + break; + } + } + + shader->uniform_dirty_bits = dirty; +} diff --git a/src/gallium/drivers/v3d/v3dx_context.h b/src/gallium/drivers/v3d/v3dx_context.h new file mode 100644 index 00000000000..faeda2c0fbb --- /dev/null +++ b/src/gallium/drivers/v3d/v3dx_context.h @@ -0,0 +1,47 @@ +/* + * Copyright © 2014-2017 Broadcom + * Copyright (C) 2012 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/* This file generates the per-v3d-version function prototypes. It must only + * be included from v3d_context.h. + */ + +struct v3d_hw; +struct vc5_format; + +void v3dX(emit_state)(struct pipe_context *pctx); +void v3dX(emit_rcl)(struct vc5_job *job); +void v3dX(draw_init)(struct pipe_context *pctx); +void v3dX(state_init)(struct pipe_context *pctx); + +void v3dX(bcl_epilogue)(struct vc5_context *vc5, struct vc5_job *job); + +void v3dX(simulator_init_regs)(struct v3d_hw *v3d); +int v3dX(simulator_get_param_ioctl)(struct v3d_hw *v3d, + struct drm_v3d_get_param *args); +void v3dX(simulator_flush)(struct v3d_hw *v3d, struct drm_v3d_submit_cl *submit, + uint32_t gmp_ofs); +const struct vc5_format *v3dX(get_format_desc)(enum pipe_format f); +void v3dX(get_internal_type_bpp_for_output_format)(uint32_t format, + uint32_t *type, + uint32_t *bpp); diff --git a/src/gallium/drivers/v3d/v3dx_draw.c b/src/gallium/drivers/v3d/v3dx_draw.c new file mode 100644 index 00000000000..03ee6b2b196 --- /dev/null +++ b/src/gallium/drivers/v3d/v3dx_draw.c @@ -0,0 +1,714 @@ +/* + * Copyright © 2014-2017 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "util/u_blitter.h" +#include "util/u_prim.h" +#include "util/u_format.h" +#include "util/u_pack_color.h" +#include "util/u_prim_restart.h" +#include "util/u_upload_mgr.h" +#include "indices/u_primconvert.h" + +#include "v3d_context.h" +#include "v3d_resource.h" +#include "v3d_cl.h" +#include "broadcom/compiler/v3d_compiler.h" +#include "broadcom/common/v3d_macros.h" +#include "broadcom/cle/v3dx_pack.h" + +/** + * Does the initial bining command list setup for drawing to a given FBO. + */ +static void +vc5_start_draw(struct vc5_context *vc5) +{ + struct vc5_job *job = vc5->job; + + if (job->needs_flush) + return; + + /* Get space to emit our BCL state, using a branch to jump to a new BO + * if necessary. + */ + vc5_cl_ensure_space_with_branch(&job->bcl, 256 /* XXX */); + + job->submit.bcl_start = job->bcl.bo->offset; + vc5_job_add_bo(job, job->bcl.bo); + + job->tile_alloc = vc5_bo_alloc(vc5->screen, 1024 * 1024, "tile alloc"); + uint32_t tsda_per_tile_size = vc5->screen->devinfo.ver >= 40 ? 256 : 64; + job->tile_state = vc5_bo_alloc(vc5->screen, + job->draw_tiles_y * + job->draw_tiles_x * + tsda_per_tile_size, + "TSDA"); + +#if V3D_VERSION < 40 + /* "Binning mode lists start with a Tile Binning Mode Configuration + * item (120)" + * + * Part1 signals the end of binning config setup. + */ + cl_emit(&job->bcl, TILE_BINNING_MODE_CONFIGURATION_PART2, config) { + config.tile_allocation_memory_address = + cl_address(job->tile_alloc, 0); + config.tile_allocation_memory_size = job->tile_alloc->size; + } +#endif + + cl_emit(&job->bcl, TILE_BINNING_MODE_CONFIGURATION_PART1, config) { +#if V3D_VERSION >= 40 + config.width_in_pixels_minus_1 = vc5->framebuffer.width - 1; + config.height_in_pixels_minus_1 = vc5->framebuffer.height - 1; + config.number_of_render_targets_minus_1 = + MAX2(vc5->framebuffer.nr_cbufs, 1) - 1; +#else /* V3D_VERSION < 40 */ + config.tile_state_data_array_base_address = + cl_address(job->tile_state, 0); + + config.width_in_tiles = job->draw_tiles_x; + config.height_in_tiles = job->draw_tiles_y; + /* Must be >= 1 */ + config.number_of_render_targets = + MAX2(vc5->framebuffer.nr_cbufs, 1); +#endif /* V3D_VERSION < 40 */ + + config.multisample_mode_4x = job->msaa; + + config.maximum_bpp_of_all_render_targets = job->internal_bpp; + } + + /* There's definitely nothing in the VCD cache we want. */ + cl_emit(&job->bcl, FLUSH_VCD_CACHE, bin); + + /* Disable any leftover OQ state from another job. */ + cl_emit(&job->bcl, OCCLUSION_QUERY_COUNTER, counter); + + /* "Binning mode lists must have a Start Tile Binning item (6) after + * any prefix state data before the binning list proper starts." + */ + cl_emit(&job->bcl, START_TILE_BINNING, bin); + + job->needs_flush = true; + job->draw_width = vc5->framebuffer.width; + job->draw_height = vc5->framebuffer.height; +} + +static void +vc5_predraw_check_textures(struct pipe_context *pctx, + struct vc5_texture_stateobj *stage_tex) +{ + struct vc5_context *vc5 = vc5_context(pctx); + + for (int i = 0; i < stage_tex->num_textures; i++) { + struct pipe_sampler_view *view = stage_tex->textures[i]; + if (!view) + continue; + + vc5_flush_jobs_writing_resource(vc5, view->texture); + } +} + +static void +vc5_emit_gl_shader_state(struct vc5_context *vc5, + const struct pipe_draw_info *info) +{ + struct vc5_job *job = vc5->job; + /* VC5_DIRTY_VTXSTATE */ + struct vc5_vertex_stateobj *vtx = vc5->vtx; + /* VC5_DIRTY_VTXBUF */ + struct vc5_vertexbuf_stateobj *vertexbuf = &vc5->vertexbuf; + + /* Upload the uniforms to the indirect CL first */ + struct vc5_cl_reloc fs_uniforms = + vc5_write_uniforms(vc5, vc5->prog.fs, + &vc5->constbuf[PIPE_SHADER_FRAGMENT], + &vc5->fragtex); + struct vc5_cl_reloc vs_uniforms = + vc5_write_uniforms(vc5, vc5->prog.vs, + &vc5->constbuf[PIPE_SHADER_VERTEX], + &vc5->verttex); + struct vc5_cl_reloc cs_uniforms = + vc5_write_uniforms(vc5, vc5->prog.cs, + &vc5->constbuf[PIPE_SHADER_VERTEX], + &vc5->verttex); + + /* See GFXH-930 workaround below */ + uint32_t num_elements_to_emit = MAX2(vtx->num_elements, 1); + uint32_t shader_rec_offset = + vc5_cl_ensure_space(&job->indirect, + cl_packet_length(GL_SHADER_STATE_RECORD) + + num_elements_to_emit * + cl_packet_length(GL_SHADER_STATE_ATTRIBUTE_RECORD), + 32); + + cl_emit(&job->indirect, GL_SHADER_STATE_RECORD, shader) { + shader.enable_clipping = true; + /* VC5_DIRTY_PRIM_MODE | VC5_DIRTY_RASTERIZER */ + shader.point_size_in_shaded_vertex_data = + (info->mode == PIPE_PRIM_POINTS && + vc5->rasterizer->base.point_size_per_vertex); + + /* Must be set if the shader modifies Z, discards, or modifies + * the sample mask. For any of these cases, the fragment + * shader needs to write the Z value (even just discards). + */ + shader.fragment_shader_does_z_writes = + (vc5->prog.fs->prog_data.fs->writes_z || + vc5->prog.fs->prog_data.fs->discard); + + shader.fragment_shader_uses_real_pixel_centre_w_in_addition_to_centroid_w2 = + vc5->prog.fs->prog_data.fs->uses_centroid_and_center_w; + + shader.number_of_varyings_in_fragment_shader = + vc5->prog.fs->prog_data.base->num_inputs; + + shader.propagate_nans = true; + + shader.coordinate_shader_code_address = + cl_address(vc5->prog.cs->bo, 0); + shader.vertex_shader_code_address = + cl_address(vc5->prog.vs->bo, 0); + shader.fragment_shader_code_address = + cl_address(vc5->prog.fs->bo, 0); + + /* XXX: Use combined input/output size flag in the common + * case. + */ + shader.coordinate_shader_has_separate_input_and_output_vpm_blocks = true; + shader.vertex_shader_has_separate_input_and_output_vpm_blocks = true; + shader.coordinate_shader_input_vpm_segment_size = + MAX2(vc5->prog.cs->prog_data.vs->vpm_input_size, 1); + shader.vertex_shader_input_vpm_segment_size = + MAX2(vc5->prog.vs->prog_data.vs->vpm_input_size, 1); + + shader.coordinate_shader_output_vpm_segment_size = + vc5->prog.cs->prog_data.vs->vpm_output_size; + shader.vertex_shader_output_vpm_segment_size = + vc5->prog.vs->prog_data.vs->vpm_output_size; + + shader.coordinate_shader_uniforms_address = cs_uniforms; + shader.vertex_shader_uniforms_address = vs_uniforms; + shader.fragment_shader_uniforms_address = fs_uniforms; + +#if V3D_VERSION >= 41 + shader.coordinate_shader_4_way_threadable = + vc5->prog.cs->prog_data.vs->base.threads == 4; + shader.vertex_shader_4_way_threadable = + vc5->prog.vs->prog_data.vs->base.threads == 4; + shader.fragment_shader_4_way_threadable = + vc5->prog.fs->prog_data.fs->base.threads == 4; + + shader.coordinate_shader_start_in_final_thread_section = + vc5->prog.cs->prog_data.vs->base.single_seg; + shader.vertex_shader_start_in_final_thread_section = + vc5->prog.vs->prog_data.vs->base.single_seg; + shader.fragment_shader_start_in_final_thread_section = + vc5->prog.fs->prog_data.fs->base.single_seg; +#else + shader.coordinate_shader_4_way_threadable = + vc5->prog.cs->prog_data.vs->base.threads == 4; + shader.coordinate_shader_2_way_threadable = + vc5->prog.cs->prog_data.vs->base.threads == 2; + shader.vertex_shader_4_way_threadable = + vc5->prog.vs->prog_data.vs->base.threads == 4; + shader.vertex_shader_2_way_threadable = + vc5->prog.vs->prog_data.vs->base.threads == 2; + shader.fragment_shader_4_way_threadable = + vc5->prog.fs->prog_data.fs->base.threads == 4; + shader.fragment_shader_2_way_threadable = + vc5->prog.fs->prog_data.fs->base.threads == 2; +#endif + + shader.vertex_id_read_by_coordinate_shader = + vc5->prog.cs->prog_data.vs->uses_vid; + shader.instance_id_read_by_coordinate_shader = + vc5->prog.cs->prog_data.vs->uses_iid; + shader.vertex_id_read_by_vertex_shader = + vc5->prog.vs->prog_data.vs->uses_vid; + shader.instance_id_read_by_vertex_shader = + vc5->prog.vs->prog_data.vs->uses_iid; + + shader.address_of_default_attribute_values = + cl_address(vtx->default_attribute_values, 0); + } + + for (int i = 0; i < vtx->num_elements; i++) { + struct pipe_vertex_element *elem = &vtx->pipe[i]; + struct pipe_vertex_buffer *vb = + &vertexbuf->vb[elem->vertex_buffer_index]; + struct vc5_resource *rsc = vc5_resource(vb->buffer.resource); + + const uint32_t size = + cl_packet_length(GL_SHADER_STATE_ATTRIBUTE_RECORD); + cl_emit_with_prepacked(&job->indirect, + GL_SHADER_STATE_ATTRIBUTE_RECORD, + &vtx->attrs[i * size], attr) { + attr.stride = vb->stride; + attr.address = cl_address(rsc->bo, + vb->buffer_offset + + elem->src_offset); + attr.number_of_values_read_by_coordinate_shader = + vc5->prog.cs->prog_data.vs->vattr_sizes[i]; + attr.number_of_values_read_by_vertex_shader = + vc5->prog.vs->prog_data.vs->vattr_sizes[i]; +#if V3D_VERSION >= 41 + attr.maximum_index = 0xffffff; +#endif + } + } + + if (vtx->num_elements == 0) { + /* GFXH-930: At least one attribute must be enabled and read + * by CS and VS. If we have no attributes being consumed by + * the shader, set up a dummy to be loaded into the VPM. + */ + cl_emit(&job->indirect, GL_SHADER_STATE_ATTRIBUTE_RECORD, attr) { + /* Valid address of data whose value will be unused. */ + attr.address = cl_address(job->indirect.bo, 0); + + attr.type = ATTRIBUTE_FLOAT; + attr.stride = 0; + attr.vec_size = 1; + + attr.number_of_values_read_by_coordinate_shader = 1; + attr.number_of_values_read_by_vertex_shader = 1; + } + } + + cl_emit(&job->bcl, GL_SHADER_STATE, state) { + state.address = cl_address(job->indirect.bo, shader_rec_offset); + state.number_of_attribute_arrays = num_elements_to_emit; + } + + vc5_bo_unreference(&cs_uniforms.bo); + vc5_bo_unreference(&vs_uniforms.bo); + vc5_bo_unreference(&fs_uniforms.bo); + + job->shader_rec_count++; +} + +/** + * Computes the various transform feedback statistics, since they can't be + * recorded by CL packets. + */ +static void +vc5_tf_statistics_record(struct vc5_context *vc5, + const struct pipe_draw_info *info, + bool prim_tf) +{ + if (!vc5->active_queries) + return; + + uint32_t prims = u_prims_for_vertices(info->mode, info->count); + vc5->prims_generated += prims; + + if (prim_tf) { + /* XXX: Only count if we didn't overflow. */ + vc5->tf_prims_generated += prims; + } +} + +static void +vc5_update_job_ez(struct vc5_context *vc5, struct vc5_job *job) +{ + switch (vc5->zsa->ez_state) { + case VC5_EZ_UNDECIDED: + /* If the Z/S state didn't pick a direction but didn't + * disable, then go along with the current EZ state. This + * allows EZ optimization for Z func == EQUAL or NEVER. + */ + break; + + case VC5_EZ_LT_LE: + case VC5_EZ_GT_GE: + /* If the Z/S state picked a direction, then it needs to match + * the current direction if we've decided on one. + */ + if (job->ez_state == VC5_EZ_UNDECIDED) + job->ez_state = vc5->zsa->ez_state; + else if (job->ez_state != vc5->zsa->ez_state) + job->ez_state = VC5_EZ_DISABLED; + break; + + case VC5_EZ_DISABLED: + /* If the current Z/S state disables EZ because of a bad Z + * func or stencil operation, then we can't do any more EZ in + * this frame. + */ + job->ez_state = VC5_EZ_DISABLED; + break; + } + + /* If the FS affects the Z of the pixels, then it may update against + * the chosen EZ direction (though we could use + * ARB_conservative_depth's hints to avoid this) + */ + if (vc5->prog.fs->prog_data.fs->writes_z) { + job->ez_state = VC5_EZ_DISABLED; + } + + if (job->first_ez_state == VC5_EZ_UNDECIDED) + job->first_ez_state = job->ez_state; +} + +static void +vc5_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info) +{ + struct vc5_context *vc5 = vc5_context(pctx); + + if (!info->count_from_stream_output && !info->indirect && + !info->primitive_restart && + !u_trim_pipe_prim(info->mode, (unsigned*)&info->count)) + return; + + /* Fall back for weird desktop GL primitive restart values. */ + if (info->primitive_restart && + info->index_size) { + uint32_t mask = ~0; + + switch (info->index_size) { + case 2: + mask = 0xffff; + break; + case 1: + mask = 0xff; + break; + } + + if (info->restart_index != mask) { + util_draw_vbo_without_prim_restart(pctx, info); + return; + } + } + + if (info->mode >= PIPE_PRIM_QUADS) { + util_primconvert_save_rasterizer_state(vc5->primconvert, &vc5->rasterizer->base); + util_primconvert_draw_vbo(vc5->primconvert, info); + perf_debug("Fallback conversion for %d %s vertices\n", + info->count, u_prim_name(info->mode)); + return; + } + + /* Before setting up the draw, flush anything writing to the textures + * that we read from. + */ + vc5_predraw_check_textures(pctx, &vc5->verttex); + vc5_predraw_check_textures(pctx, &vc5->fragtex); + + struct vc5_job *job = vc5_get_job_for_fbo(vc5); + + /* Get space to emit our draw call into the BCL, using a branch to + * jump to a new BO if necessary. + */ + vc5_cl_ensure_space_with_branch(&job->bcl, 256 /* XXX */); + + if (vc5->prim_mode != info->mode) { + vc5->prim_mode = info->mode; + vc5->dirty |= VC5_DIRTY_PRIM_MODE; + } + + vc5_start_draw(vc5); + vc5_update_compiled_shaders(vc5, info->mode); + vc5_update_job_ez(vc5, job); + +#if V3D_VERSION >= 41 + v3d41_emit_state(pctx); +#else + v3d33_emit_state(pctx); +#endif + + if (vc5->dirty & (VC5_DIRTY_VTXBUF | + VC5_DIRTY_VTXSTATE | + VC5_DIRTY_PRIM_MODE | + VC5_DIRTY_RASTERIZER | + VC5_DIRTY_COMPILED_CS | + VC5_DIRTY_COMPILED_VS | + VC5_DIRTY_COMPILED_FS | + vc5->prog.cs->uniform_dirty_bits | + vc5->prog.vs->uniform_dirty_bits | + vc5->prog.fs->uniform_dirty_bits)) { + vc5_emit_gl_shader_state(vc5, info); + } + + vc5->dirty = 0; + + /* The Base Vertex/Base Instance packet sets those values to nonzero + * for the next draw call only. + */ + if (info->index_bias || info->start_instance) { + cl_emit(&job->bcl, BASE_VERTEX_BASE_INSTANCE, base) { + base.base_instance = info->start_instance; + base.base_vertex = info->index_bias; + } + } + + uint32_t prim_tf_enable = 0; +#if V3D_VERSION < 40 + /* V3D 3.x: The HW only processes transform feedback on primitives + * with the flag set. + */ + if (vc5->streamout.num_targets) + prim_tf_enable = (V3D_PRIM_POINTS_TF - V3D_PRIM_POINTS); +#endif + + vc5_tf_statistics_record(vc5, info, vc5->streamout.num_targets); + + /* Note that the primitive type fields match with OpenGL/gallium + * definitions, up to but not including QUADS. + */ + if (info->index_size) { + uint32_t index_size = info->index_size; + uint32_t offset = info->start * index_size; + struct pipe_resource *prsc; + if (info->has_user_indices) { + prsc = NULL; + u_upload_data(vc5->uploader, 0, + info->count * info->index_size, 4, + info->index.user, + &offset, &prsc); + } else { + prsc = info->index.resource; + } + struct vc5_resource *rsc = vc5_resource(prsc); + +#if V3D_VERSION >= 40 + cl_emit(&job->bcl, INDEX_BUFFER_SETUP, ib) { + ib.address = cl_address(rsc->bo, 0); + ib.size = rsc->bo->size; + } +#endif + + if (info->instance_count > 1) { + cl_emit(&job->bcl, INDEXED_INSTANCED_PRIMITIVE_LIST, prim) { + prim.index_type = ffs(info->index_size) - 1; +#if V3D_VERSION >= 40 + prim.index_offset = offset; +#else /* V3D_VERSION < 40 */ + prim.maximum_index = (1u << 31) - 1; /* XXX */ + prim.address_of_indices_list = + cl_address(rsc->bo, offset); +#endif /* V3D_VERSION < 40 */ + prim.mode = info->mode | prim_tf_enable; + prim.enable_primitive_restarts = info->primitive_restart; + + prim.number_of_instances = info->instance_count; + prim.instance_length = info->count; + } + } else { + cl_emit(&job->bcl, INDEXED_PRIMITIVE_LIST, prim) { + prim.index_type = ffs(info->index_size) - 1; + prim.length = info->count; +#if V3D_VERSION >= 40 + prim.index_offset = offset; +#else /* V3D_VERSION < 40 */ + prim.maximum_index = (1u << 31) - 1; /* XXX */ + prim.address_of_indices_list = + cl_address(rsc->bo, offset); +#endif /* V3D_VERSION < 40 */ + prim.mode = info->mode | prim_tf_enable; + prim.enable_primitive_restarts = info->primitive_restart; + } + } + + job->draw_calls_queued++; + + if (info->has_user_indices) + pipe_resource_reference(&prsc, NULL); + } else { + if (info->instance_count > 1) { + cl_emit(&job->bcl, VERTEX_ARRAY_INSTANCED_PRIMITIVES, prim) { + prim.mode = info->mode | prim_tf_enable; + prim.index_of_first_vertex = info->start; + prim.number_of_instances = info->instance_count; + prim.instance_length = info->count; + } + } else { + cl_emit(&job->bcl, VERTEX_ARRAY_PRIMITIVES, prim) { + prim.mode = info->mode | prim_tf_enable; + prim.length = info->count; + prim.index_of_first_vertex = info->start; + } + } + } + job->draw_calls_queued++; + + if (vc5->zsa && job->zsbuf && + (vc5->zsa->base.depth.enabled || + vc5->zsa->base.stencil[0].enabled)) { + struct vc5_resource *rsc = vc5_resource(job->zsbuf->texture); + vc5_job_add_bo(job, rsc->bo); + + if (vc5->zsa->base.depth.enabled) { + job->resolve |= PIPE_CLEAR_DEPTH; + rsc->initialized_buffers = PIPE_CLEAR_DEPTH; + } + + if (vc5->zsa->base.stencil[0].enabled) { + job->resolve |= PIPE_CLEAR_STENCIL; + rsc->initialized_buffers |= PIPE_CLEAR_STENCIL; + } + } + + for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++) { + uint32_t bit = PIPE_CLEAR_COLOR0 << i; + + if (job->resolve & bit || !job->cbufs[i]) + continue; + struct vc5_resource *rsc = vc5_resource(job->cbufs[i]->texture); + + job->resolve |= bit; + vc5_job_add_bo(job, rsc->bo); + } + + if (job->referenced_size > 768 * 1024 * 1024) { + perf_debug("Flushing job with %dkb to try to free up memory\n", + job->referenced_size / 1024); + vc5_flush(pctx); + } + + if (V3D_DEBUG & V3D_DEBUG_ALWAYS_FLUSH) + vc5_flush(pctx); +} + +static void +vc5_clear(struct pipe_context *pctx, unsigned buffers, + const union pipe_color_union *color, double depth, unsigned stencil) +{ + struct vc5_context *vc5 = vc5_context(pctx); + struct vc5_job *job = vc5_get_job_for_fbo(vc5); + + /* We can't flag new buffers for clearing once we've queued draws. We + * could avoid this by using the 3d engine to clear. + */ + if (job->draw_calls_queued) { + perf_debug("Flushing rendering to process new clear.\n"); + vc5_job_submit(vc5, job); + job = vc5_get_job_for_fbo(vc5); + } + + for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++) { + uint32_t bit = PIPE_CLEAR_COLOR0 << i; + if (!(buffers & bit)) + continue; + + struct pipe_surface *psurf = vc5->framebuffer.cbufs[i]; + struct vc5_surface *surf = vc5_surface(psurf); + struct vc5_resource *rsc = vc5_resource(psurf->texture); + + union util_color uc; + uint32_t internal_size = 4 << surf->internal_bpp; + + static union pipe_color_union swapped_color; + if (vc5->swap_color_rb & (1 << i)) { + swapped_color.f[0] = color->f[2]; + swapped_color.f[1] = color->f[1]; + swapped_color.f[2] = color->f[0]; + swapped_color.f[3] = color->f[3]; + color = &swapped_color; + } + + switch (surf->internal_type) { + case V3D_INTERNAL_TYPE_8: + util_pack_color(color->f, PIPE_FORMAT_R8G8B8A8_UNORM, + &uc); + memcpy(job->clear_color[i], uc.ui, internal_size); + break; + case V3D_INTERNAL_TYPE_8I: + case V3D_INTERNAL_TYPE_8UI: + job->clear_color[i][0] = ((color->ui[0] & 0xff) | + (color->ui[1] & 0xff) << 8 | + (color->ui[2] & 0xff) << 16 | + (color->ui[3] & 0xff) << 24); + break; + case V3D_INTERNAL_TYPE_16F: + util_pack_color(color->f, PIPE_FORMAT_R16G16B16A16_FLOAT, + &uc); + memcpy(job->clear_color[i], uc.ui, internal_size); + break; + case V3D_INTERNAL_TYPE_16I: + case V3D_INTERNAL_TYPE_16UI: + job->clear_color[i][0] = ((color->ui[0] & 0xffff) | + color->ui[1] << 16); + job->clear_color[i][1] = ((color->ui[2] & 0xffff) | + color->ui[3] << 16); + break; + case V3D_INTERNAL_TYPE_32F: + case V3D_INTERNAL_TYPE_32I: + case V3D_INTERNAL_TYPE_32UI: + memcpy(job->clear_color[i], color->ui, internal_size); + break; + } + + rsc->initialized_buffers |= bit; + } + + unsigned zsclear = buffers & PIPE_CLEAR_DEPTHSTENCIL; + if (zsclear) { + struct vc5_resource *rsc = + vc5_resource(vc5->framebuffer.zsbuf->texture); + + if (zsclear & PIPE_CLEAR_DEPTH) + job->clear_z = depth; + if (zsclear & PIPE_CLEAR_STENCIL) + job->clear_s = stencil; + + rsc->initialized_buffers |= zsclear; + } + + job->draw_min_x = 0; + job->draw_min_y = 0; + job->draw_max_x = vc5->framebuffer.width; + job->draw_max_y = vc5->framebuffer.height; + job->cleared |= buffers; + job->resolve |= buffers; + + vc5_start_draw(vc5); +} + +static void +vc5_clear_render_target(struct pipe_context *pctx, struct pipe_surface *ps, + const union pipe_color_union *color, + unsigned x, unsigned y, unsigned w, unsigned h, + bool render_condition_enabled) +{ + fprintf(stderr, "unimpl: clear RT\n"); +} + +static void +vc5_clear_depth_stencil(struct pipe_context *pctx, struct pipe_surface *ps, + unsigned buffers, double depth, unsigned stencil, + unsigned x, unsigned y, unsigned w, unsigned h, + bool render_condition_enabled) +{ + fprintf(stderr, "unimpl: clear DS\n"); +} + +void +v3dX(draw_init)(struct pipe_context *pctx) +{ + pctx->draw_vbo = vc5_draw_vbo; + pctx->clear = vc5_clear; + pctx->clear_render_target = vc5_clear_render_target; + pctx->clear_depth_stencil = vc5_clear_depth_stencil; +} diff --git a/src/gallium/drivers/v3d/v3dx_emit.c b/src/gallium/drivers/v3d/v3dx_emit.c new file mode 100644 index 00000000000..e2aba356de4 --- /dev/null +++ b/src/gallium/drivers/v3d/v3dx_emit.c @@ -0,0 +1,722 @@ +/* + * Copyright © 2014-2017 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "util/u_format.h" +#include "util/u_half.h" +#include "v3d_context.h" +#include "broadcom/common/v3d_macros.h" +#include "broadcom/cle/v3dx_pack.h" +#include "broadcom/compiler/v3d_compiler.h" + +static uint8_t +vc5_factor(enum pipe_blendfactor factor, bool dst_alpha_one) +{ + /* We may get a bad blendfactor when blending is disabled. */ + if (factor == 0) + return V3D_BLEND_FACTOR_ZERO; + + switch (factor) { + case PIPE_BLENDFACTOR_ZERO: + return V3D_BLEND_FACTOR_ZERO; + case PIPE_BLENDFACTOR_ONE: + return V3D_BLEND_FACTOR_ONE; + case PIPE_BLENDFACTOR_SRC_COLOR: + return V3D_BLEND_FACTOR_SRC_COLOR; + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + return V3D_BLEND_FACTOR_INV_SRC_COLOR; + case PIPE_BLENDFACTOR_DST_COLOR: + return V3D_BLEND_FACTOR_DST_COLOR; + case PIPE_BLENDFACTOR_INV_DST_COLOR: + return V3D_BLEND_FACTOR_INV_DST_COLOR; + case PIPE_BLENDFACTOR_SRC_ALPHA: + return V3D_BLEND_FACTOR_SRC_ALPHA; + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + return V3D_BLEND_FACTOR_INV_SRC_ALPHA; + case PIPE_BLENDFACTOR_DST_ALPHA: + return (dst_alpha_one ? + V3D_BLEND_FACTOR_ONE : + V3D_BLEND_FACTOR_DST_ALPHA); + case PIPE_BLENDFACTOR_INV_DST_ALPHA: + return (dst_alpha_one ? + V3D_BLEND_FACTOR_ZERO : + V3D_BLEND_FACTOR_INV_DST_ALPHA); + case PIPE_BLENDFACTOR_CONST_COLOR: + return V3D_BLEND_FACTOR_CONST_COLOR; + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + return V3D_BLEND_FACTOR_INV_CONST_COLOR; + case PIPE_BLENDFACTOR_CONST_ALPHA: + return V3D_BLEND_FACTOR_CONST_ALPHA; + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + return V3D_BLEND_FACTOR_INV_CONST_ALPHA; + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: + return V3D_BLEND_FACTOR_SRC_ALPHA_SATURATE; + default: + unreachable("Bad blend factor"); + } +} + +static inline uint16_t +swizzled_border_color(const struct v3d_device_info *devinfo, + struct pipe_sampler_state *sampler, + struct vc5_sampler_view *sview, + int chan) +{ + const struct util_format_description *desc = + util_format_description(sview->base.format); + uint8_t swiz = chan; + + /* If we're doing swizzling in the sampler, then only rearrange the + * border color for the mismatch between the VC5 texture format and + * the PIPE_FORMAT, since GL_ARB_texture_swizzle will be handled by + * the sampler's swizzle. + * + * For swizzling in the shader, we don't do any pre-swizzling of the + * border color. + */ + if (vc5_get_tex_return_size(devinfo, sview->base.format, + sampler->compare_mode) != 32) + swiz = desc->swizzle[swiz]; + + switch (swiz) { + case PIPE_SWIZZLE_0: + return util_float_to_half(0.0); + case PIPE_SWIZZLE_1: + return util_float_to_half(1.0); + default: + return util_float_to_half(sampler->border_color.f[swiz]); + } +} + +#if V3D_VERSION < 40 +static uint32_t +translate_swizzle(unsigned char pipe_swizzle) +{ + switch (pipe_swizzle) { + case PIPE_SWIZZLE_0: + return 0; + case PIPE_SWIZZLE_1: + return 1; + case PIPE_SWIZZLE_X: + case PIPE_SWIZZLE_Y: + case PIPE_SWIZZLE_Z: + case PIPE_SWIZZLE_W: + return 2 + pipe_swizzle; + default: + unreachable("unknown swizzle"); + } +} + +static void +emit_one_texture(struct vc5_context *vc5, struct vc5_texture_stateobj *stage_tex, + int i) +{ + struct vc5_job *job = vc5->job; + struct pipe_sampler_state *psampler = stage_tex->samplers[i]; + struct vc5_sampler_state *sampler = vc5_sampler_state(psampler); + struct pipe_sampler_view *psview = stage_tex->textures[i]; + struct vc5_sampler_view *sview = vc5_sampler_view(psview); + struct pipe_resource *prsc = psview->texture; + struct vc5_resource *rsc = vc5_resource(prsc); + const struct v3d_device_info *devinfo = &vc5->screen->devinfo; + + stage_tex->texture_state[i].offset = + vc5_cl_ensure_space(&job->indirect, + cl_packet_length(TEXTURE_SHADER_STATE), + 32); + vc5_bo_set_reference(&stage_tex->texture_state[i].bo, + job->indirect.bo); + + uint32_t return_size = vc5_get_tex_return_size(devinfo, psview->format, + psampler->compare_mode); + + struct V3D33_TEXTURE_SHADER_STATE unpacked = { + /* XXX */ + .border_color_red = swizzled_border_color(devinfo, psampler, + sview, 0), + .border_color_green = swizzled_border_color(devinfo, psampler, + sview, 1), + .border_color_blue = swizzled_border_color(devinfo, psampler, + sview, 2), + .border_color_alpha = swizzled_border_color(devinfo, psampler, + sview, 3), + + /* In the normal texturing path, the LOD gets clamped between + * min/max, and the base_level field (set in the sampler view + * from first_level) only decides where the min/mag switch + * happens, so we need to use the LOD clamps to keep us + * between min and max. + * + * For txf, the LOD clamp is still used, despite GL not + * wanting that. We will need to have a separate + * TEXTURE_SHADER_STATE that ignores psview->min/max_lod to + * support txf properly. + */ + .min_level_of_detail = MIN2(psview->u.tex.first_level + + MAX2(psampler->min_lod, 0), + psview->u.tex.last_level), + .max_level_of_detail = MIN2(psview->u.tex.first_level + + psampler->max_lod, + psview->u.tex.last_level), + + .texture_base_pointer = cl_address(rsc->bo, + rsc->slices[0].offset), + + .output_32_bit = return_size == 32, + }; + + /* Set up the sampler swizzle if we're doing 16-bit sampling. For + * 32-bit, we leave swizzling up to the shader compiler. + * + * Note: Contrary to the docs, the swizzle still applies even if the + * return size is 32. It's just that you probably want to swizzle in + * the shader, because you need the Y/Z/W channels to be defined. + */ + if (return_size == 32) { + unpacked.swizzle_r = translate_swizzle(PIPE_SWIZZLE_X); + unpacked.swizzle_g = translate_swizzle(PIPE_SWIZZLE_Y); + unpacked.swizzle_b = translate_swizzle(PIPE_SWIZZLE_Z); + unpacked.swizzle_a = translate_swizzle(PIPE_SWIZZLE_W); + } else { + unpacked.swizzle_r = translate_swizzle(sview->swizzle[0]); + unpacked.swizzle_g = translate_swizzle(sview->swizzle[1]); + unpacked.swizzle_b = translate_swizzle(sview->swizzle[2]); + unpacked.swizzle_a = translate_swizzle(sview->swizzle[3]); + } + + int min_img_filter = psampler->min_img_filter; + int min_mip_filter = psampler->min_mip_filter; + int mag_img_filter = psampler->mag_img_filter; + + if (return_size == 32) { + min_mip_filter = PIPE_TEX_MIPFILTER_NEAREST; + mag_img_filter = PIPE_TEX_FILTER_NEAREST; + mag_img_filter = PIPE_TEX_FILTER_NEAREST; + } + + bool min_nearest = min_img_filter == PIPE_TEX_FILTER_NEAREST; + switch (min_mip_filter) { + case PIPE_TEX_MIPFILTER_NONE: + unpacked.filter += min_nearest ? 2 : 0; + break; + case PIPE_TEX_MIPFILTER_NEAREST: + unpacked.filter += min_nearest ? 4 : 8; + break; + case PIPE_TEX_MIPFILTER_LINEAR: + unpacked.filter += min_nearest ? 4 : 8; + unpacked.filter += 2; + break; + } + + if (mag_img_filter == PIPE_TEX_FILTER_NEAREST) + unpacked.filter++; + + if (psampler->max_anisotropy > 8) + unpacked.filter = V3D_TMU_FILTER_ANISOTROPIC_16_1; + else if (psampler->max_anisotropy > 4) + unpacked.filter = V3D_TMU_FILTER_ANISOTROPIC_8_1; + else if (psampler->max_anisotropy > 2) + unpacked.filter = V3D_TMU_FILTER_ANISOTROPIC_4_1; + else if (psampler->max_anisotropy) + unpacked.filter = V3D_TMU_FILTER_ANISOTROPIC_2_1; + + uint8_t packed[cl_packet_length(TEXTURE_SHADER_STATE)]; + cl_packet_pack(TEXTURE_SHADER_STATE)(&job->indirect, packed, &unpacked); + + for (int i = 0; i < ARRAY_SIZE(packed); i++) + packed[i] |= sview->texture_shader_state[i] | sampler->texture_shader_state[i]; + + /* TMU indirect structs need to be 32b aligned. */ + vc5_cl_ensure_space(&job->indirect, ARRAY_SIZE(packed), 32); + cl_emit_prepacked(&job->indirect, &packed); +} + +static void +emit_textures(struct vc5_context *vc5, struct vc5_texture_stateobj *stage_tex) +{ + for (int i = 0; i < stage_tex->num_textures; i++) { + if (stage_tex->textures[i]) + emit_one_texture(vc5, stage_tex, i); + } +} +#endif /* V3D_VERSION < 40 */ + +static uint32_t +translate_colormask(struct vc5_context *vc5, uint32_t colormask, int rt) +{ + if (vc5->swap_color_rb & (1 << rt)) { + colormask = ((colormask & (2 | 8)) | + ((colormask & 1) << 2) | + ((colormask & 4) >> 2)); + } + + return (~colormask) & 0xf; +} + +static void +emit_rt_blend(struct vc5_context *vc5, struct vc5_job *job, + struct pipe_blend_state *blend, int rt) +{ + cl_emit(&job->bcl, BLEND_CONFIG, config) { + struct pipe_rt_blend_state *rtblend = &blend->rt[rt]; + +#if V3D_VERSION >= 40 + config.render_target_mask = 1 << rt; +#else + assert(rt == 0); +#endif + + config.colour_blend_mode = rtblend->rgb_func; + config.colour_blend_dst_factor = + vc5_factor(rtblend->rgb_dst_factor, + vc5->blend_dst_alpha_one); + config.colour_blend_src_factor = + vc5_factor(rtblend->rgb_src_factor, + vc5->blend_dst_alpha_one); + + config.alpha_blend_mode = rtblend->alpha_func; + config.alpha_blend_dst_factor = + vc5_factor(rtblend->alpha_dst_factor, + vc5->blend_dst_alpha_one); + config.alpha_blend_src_factor = + vc5_factor(rtblend->alpha_src_factor, + vc5->blend_dst_alpha_one); + } +} + +void +v3dX(emit_state)(struct pipe_context *pctx) +{ + struct vc5_context *vc5 = vc5_context(pctx); + struct vc5_job *job = vc5->job; + bool rasterizer_discard = vc5->rasterizer->base.rasterizer_discard; + + if (vc5->dirty & (VC5_DIRTY_SCISSOR | VC5_DIRTY_VIEWPORT | + VC5_DIRTY_RASTERIZER)) { + float *vpscale = vc5->viewport.scale; + float *vptranslate = vc5->viewport.translate; + float vp_minx = -fabsf(vpscale[0]) + vptranslate[0]; + float vp_maxx = fabsf(vpscale[0]) + vptranslate[0]; + float vp_miny = -fabsf(vpscale[1]) + vptranslate[1]; + float vp_maxy = fabsf(vpscale[1]) + vptranslate[1]; + + /* Clip to the scissor if it's enabled, but still clip to the + * drawable regardless since that controls where the binner + * tries to put things. + * + * Additionally, always clip the rendering to the viewport, + * since the hardware does guardband clipping, meaning + * primitives would rasterize outside of the view volume. + */ + uint32_t minx, miny, maxx, maxy; + if (!vc5->rasterizer->base.scissor) { + minx = MAX2(vp_minx, 0); + miny = MAX2(vp_miny, 0); + maxx = MIN2(vp_maxx, job->draw_width); + maxy = MIN2(vp_maxy, job->draw_height); + } else { + minx = MAX2(vp_minx, vc5->scissor.minx); + miny = MAX2(vp_miny, vc5->scissor.miny); + maxx = MIN2(vp_maxx, vc5->scissor.maxx); + maxy = MIN2(vp_maxy, vc5->scissor.maxy); + } + + cl_emit(&job->bcl, CLIP_WINDOW, clip) { + clip.clip_window_left_pixel_coordinate = minx; + clip.clip_window_bottom_pixel_coordinate = miny; + clip.clip_window_width_in_pixels = maxx - minx; + clip.clip_window_height_in_pixels = maxy - miny; + +#if V3D_VERSION < 41 + /* The HW won't entirely clip out when scissor w/h is + * 0. Just treat it the same as rasterizer discard. + */ + if (clip.clip_window_width_in_pixels == 0 || + clip.clip_window_height_in_pixels == 0) { + rasterizer_discard = true; + clip.clip_window_width_in_pixels = 1; + clip.clip_window_height_in_pixels = 1; + } +#endif + } + + job->draw_min_x = MIN2(job->draw_min_x, minx); + job->draw_min_y = MIN2(job->draw_min_y, miny); + job->draw_max_x = MAX2(job->draw_max_x, maxx); + job->draw_max_y = MAX2(job->draw_max_y, maxy); + } + + if (vc5->dirty & (VC5_DIRTY_RASTERIZER | + VC5_DIRTY_ZSA | + VC5_DIRTY_BLEND | + VC5_DIRTY_COMPILED_FS)) { + cl_emit(&job->bcl, CONFIGURATION_BITS, config) { + config.enable_forward_facing_primitive = + !rasterizer_discard && + !(vc5->rasterizer->base.cull_face & + PIPE_FACE_FRONT); + config.enable_reverse_facing_primitive = + !rasterizer_discard && + !(vc5->rasterizer->base.cull_face & + PIPE_FACE_BACK); + /* This seems backwards, but it's what gets the + * clipflat test to pass. + */ + config.clockwise_primitives = + vc5->rasterizer->base.front_ccw; + + config.enable_depth_offset = + vc5->rasterizer->base.offset_tri; + + config.rasterizer_oversample_mode = + vc5->rasterizer->base.multisample; + + config.direct3d_provoking_vertex = + vc5->rasterizer->base.flatshade_first; + + config.blend_enable = vc5->blend->rt[0].blend_enable; + + /* Note: EZ state may update based on the compiled FS, + * along with ZSA + */ + config.early_z_updates_enable = + (job->ez_state != VC5_EZ_DISABLED); + if (vc5->zsa->base.depth.enabled) { + config.z_updates_enable = + vc5->zsa->base.depth.writemask; + config.early_z_enable = + config.early_z_updates_enable; + config.depth_test_function = + vc5->zsa->base.depth.func; + } else { + config.depth_test_function = PIPE_FUNC_ALWAYS; + } + + config.stencil_enable = + vc5->zsa->base.stencil[0].enabled; + } + + } + + if (vc5->dirty & VC5_DIRTY_RASTERIZER && + vc5->rasterizer->base.offset_tri) { + cl_emit(&job->bcl, DEPTH_OFFSET, depth) { + depth.depth_offset_factor = + vc5->rasterizer->offset_factor; + depth.depth_offset_units = + vc5->rasterizer->offset_units; + } + } + + if (vc5->dirty & VC5_DIRTY_RASTERIZER) { + cl_emit(&job->bcl, POINT_SIZE, point_size) { + point_size.point_size = vc5->rasterizer->point_size; + } + + cl_emit(&job->bcl, LINE_WIDTH, line_width) { + line_width.line_width = vc5->rasterizer->base.line_width; + } + } + + if (vc5->dirty & VC5_DIRTY_VIEWPORT) { + cl_emit(&job->bcl, CLIPPER_XY_SCALING, clip) { + clip.viewport_half_width_in_1_256th_of_pixel = + vc5->viewport.scale[0] * 256.0f; + clip.viewport_half_height_in_1_256th_of_pixel = + vc5->viewport.scale[1] * 256.0f; + } + + cl_emit(&job->bcl, CLIPPER_Z_SCALE_AND_OFFSET, clip) { + clip.viewport_z_offset_zc_to_zs = + vc5->viewport.translate[2]; + clip.viewport_z_scale_zc_to_zs = + vc5->viewport.scale[2]; + } + cl_emit(&job->bcl, CLIPPER_Z_MIN_MAX_CLIPPING_PLANES, clip) { + clip.minimum_zw = (vc5->viewport.translate[2] - + vc5->viewport.scale[2]); + clip.maximum_zw = (vc5->viewport.translate[2] + + vc5->viewport.scale[2]); + } + + cl_emit(&job->bcl, VIEWPORT_OFFSET, vp) { + vp.viewport_centre_x_coordinate = + vc5->viewport.translate[0]; + vp.viewport_centre_y_coordinate = + vc5->viewport.translate[1]; + } + } + + if (vc5->dirty & VC5_DIRTY_BLEND && vc5->blend->rt[0].blend_enable) { + struct pipe_blend_state *blend = vc5->blend; + + if (blend->independent_blend_enable) { + for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++) + emit_rt_blend(vc5, job, blend, i); + } else { + emit_rt_blend(vc5, job, blend, 0); + } + } + + if (vc5->dirty & VC5_DIRTY_BLEND) { + struct pipe_blend_state *blend = vc5->blend; + + cl_emit(&job->bcl, COLOUR_WRITE_MASKS, mask) { + if (blend->independent_blend_enable) { + mask.render_target_0_per_colour_component_write_masks = + translate_colormask(vc5, blend->rt[0].colormask, 0); + mask.render_target_1_per_colour_component_write_masks = + translate_colormask(vc5, blend->rt[1].colormask, 1); + mask.render_target_2_per_colour_component_write_masks = + translate_colormask(vc5, blend->rt[2].colormask, 2); + mask.render_target_3_per_colour_component_write_masks = + translate_colormask(vc5, blend->rt[3].colormask, 3); + } else { + mask.render_target_0_per_colour_component_write_masks = + translate_colormask(vc5, blend->rt[0].colormask, 0); + mask.render_target_1_per_colour_component_write_masks = + translate_colormask(vc5, blend->rt[0].colormask, 1); + mask.render_target_2_per_colour_component_write_masks = + translate_colormask(vc5, blend->rt[0].colormask, 2); + mask.render_target_3_per_colour_component_write_masks = + translate_colormask(vc5, blend->rt[0].colormask, 3); + } + } + } + + /* GFXH-1431: On V3D 3.x, writing BLEND_CONFIG resets the constant + * color. + */ + if (vc5->dirty & VC5_DIRTY_BLEND_COLOR || + (V3D_VERSION < 41 && (vc5->dirty & VC5_DIRTY_BLEND))) { + cl_emit(&job->bcl, BLEND_CONSTANT_COLOUR, colour) { + colour.red_f16 = (vc5->swap_color_rb ? + vc5->blend_color.hf[2] : + vc5->blend_color.hf[0]); + colour.green_f16 = vc5->blend_color.hf[1]; + colour.blue_f16 = (vc5->swap_color_rb ? + vc5->blend_color.hf[0] : + vc5->blend_color.hf[2]); + colour.alpha_f16 = vc5->blend_color.hf[3]; + } + } + + if (vc5->dirty & (VC5_DIRTY_ZSA | VC5_DIRTY_STENCIL_REF)) { + struct pipe_stencil_state *front = &vc5->zsa->base.stencil[0]; + struct pipe_stencil_state *back = &vc5->zsa->base.stencil[1]; + + if (front->enabled) { + cl_emit_with_prepacked(&job->bcl, STENCIL_CONFIG, + vc5->zsa->stencil_front, config) { + config.stencil_ref_value = + vc5->stencil_ref.ref_value[0]; + } + } + + if (back->enabled) { + cl_emit_with_prepacked(&job->bcl, STENCIL_CONFIG, + vc5->zsa->stencil_back, config) { + config.stencil_ref_value = + vc5->stencil_ref.ref_value[1]; + } + } + } + +#if V3D_VERSION < 40 + /* Pre-4.x, we have texture state that depends on both the sampler and + * the view, so we merge them together at draw time. + */ + if (vc5->dirty & VC5_DIRTY_FRAGTEX) + emit_textures(vc5, &vc5->fragtex); + + if (vc5->dirty & VC5_DIRTY_VERTTEX) + emit_textures(vc5, &vc5->verttex); +#endif + + if (vc5->dirty & VC5_DIRTY_FLAT_SHADE_FLAGS) { + bool emitted_any = false; + + for (int i = 0; i < ARRAY_SIZE(vc5->prog.fs->prog_data.fs->flat_shade_flags); i++) { + if (!vc5->prog.fs->prog_data.fs->flat_shade_flags[i]) + continue; + + cl_emit(&job->bcl, FLAT_SHADE_FLAGS, flags) { + flags.varying_offset_v0 = i; + + if (emitted_any) { + flags.action_for_flat_shade_flags_of_lower_numbered_varyings = + V3D_VARYING_FLAGS_ACTION_UNCHANGED; + flags.action_for_flat_shade_flags_of_higher_numbered_varyings = + V3D_VARYING_FLAGS_ACTION_UNCHANGED; + } else { + flags.action_for_flat_shade_flags_of_lower_numbered_varyings = + ((i == 0) ? + V3D_VARYING_FLAGS_ACTION_UNCHANGED : + V3D_VARYING_FLAGS_ACTION_ZEROED); + + flags.action_for_flat_shade_flags_of_higher_numbered_varyings = + V3D_VARYING_FLAGS_ACTION_ZEROED; + } + + flags.flat_shade_flags_for_varyings_v024 = + vc5->prog.fs->prog_data.fs->flat_shade_flags[i]; + } + + emitted_any = true; + } + + if (!emitted_any) { + cl_emit(&job->bcl, ZERO_ALL_FLAT_SHADE_FLAGS, flags); + } + } + +#if V3D_VERSION >= 40 + if (vc5->dirty & VC5_DIRTY_CENTROID_FLAGS) { + bool emitted_any = false; + + for (int i = 0; i < ARRAY_SIZE(vc5->prog.fs->prog_data.fs->centroid_flags); i++) { + if (!vc5->prog.fs->prog_data.fs->centroid_flags[i]) + continue; + + cl_emit(&job->bcl, CENTROID_FLAGS, flags) { + flags.varying_offset_v0 = i; + + if (emitted_any) { + flags.action_for_centroid_flags_of_lower_numbered_varyings = + V3D_VARYING_FLAGS_ACTION_UNCHANGED; + flags.action_for_centroid_flags_of_higher_numbered_varyings = + V3D_VARYING_FLAGS_ACTION_UNCHANGED; + } else { + flags.action_for_centroid_flags_of_lower_numbered_varyings = + ((i == 0) ? + V3D_VARYING_FLAGS_ACTION_UNCHANGED : + V3D_VARYING_FLAGS_ACTION_ZEROED); + + flags.action_for_centroid_flags_of_higher_numbered_varyings = + V3D_VARYING_FLAGS_ACTION_ZEROED; + } + + flags.centroid_flags_for_varyings_v024 = + vc5->prog.fs->prog_data.fs->centroid_flags[i]; + } + + emitted_any = true; + } + + if (!emitted_any) { + cl_emit(&job->bcl, ZERO_ALL_CENTROID_FLAGS, flags); + } + } +#endif + + /* Set up the transform feedback data specs (which VPM entries to + * output to which buffers). + */ + if (vc5->dirty & (VC5_DIRTY_STREAMOUT | + VC5_DIRTY_RASTERIZER | + VC5_DIRTY_PRIM_MODE)) { + struct vc5_streamout_stateobj *so = &vc5->streamout; + + if (so->num_targets) { + bool psiz_per_vertex = (vc5->prim_mode == PIPE_PRIM_POINTS && + vc5->rasterizer->base.point_size_per_vertex); + uint16_t *tf_specs = (psiz_per_vertex ? + vc5->prog.bind_vs->tf_specs_psiz : + vc5->prog.bind_vs->tf_specs); + +#if V3D_VERSION >= 40 + job->tf_enabled = (vc5->prog.bind_vs->num_tf_specs != 0 && + vc5->active_queries); + + cl_emit(&job->bcl, TRANSFORM_FEEDBACK_SPECS, tfe) { + tfe.number_of_16_bit_output_data_specs_following = + vc5->prog.bind_vs->num_tf_specs; + tfe.enable = job->tf_enabled; + }; +#else /* V3D_VERSION < 40 */ + cl_emit(&job->bcl, TRANSFORM_FEEDBACK_ENABLE, tfe) { + tfe.number_of_32_bit_output_buffer_address_following = + so->num_targets; + tfe.number_of_16_bit_output_data_specs_following = + vc5->prog.bind_vs->num_tf_specs; + }; +#endif /* V3D_VERSION < 40 */ + for (int i = 0; i < vc5->prog.bind_vs->num_tf_specs; i++) { + cl_emit_prepacked(&job->bcl, &tf_specs[i]); + } + } else if (job->tf_enabled) { +#if V3D_VERSION >= 40 + cl_emit(&job->bcl, TRANSFORM_FEEDBACK_SPECS, tfe) { + tfe.enable = false; + }; + job->tf_enabled = false; +#endif /* V3D_VERSION >= 40 */ + } + } + + /* Set up the trasnform feedback buffers. */ + if (vc5->dirty & VC5_DIRTY_STREAMOUT) { + struct vc5_streamout_stateobj *so = &vc5->streamout; + for (int i = 0; i < so->num_targets; i++) { + const struct pipe_stream_output_target *target = + so->targets[i]; + struct vc5_resource *rsc = target ? + vc5_resource(target->buffer) : NULL; + +#if V3D_VERSION >= 40 + if (!target) + continue; + + cl_emit(&job->bcl, TRANSFORM_FEEDBACK_BUFFER, output) { + output.buffer_address = + cl_address(rsc->bo, + target->buffer_offset); + output.buffer_size_in_32_bit_words = + target->buffer_size >> 2; + output.buffer_number = i; + } +#else /* V3D_VERSION < 40 */ + cl_emit(&job->bcl, TRANSFORM_FEEDBACK_OUTPUT_ADDRESS, output) { + if (target) { + output.address = + cl_address(rsc->bo, + target->buffer_offset); + } + }; +#endif /* V3D_VERSION < 40 */ + if (target) { + vc5_job_add_write_resource(vc5->job, + target->buffer); + } + /* XXX: buffer_size? */ + } + } + + if (vc5->dirty & VC5_DIRTY_OQ) { + cl_emit(&job->bcl, OCCLUSION_QUERY_COUNTER, counter) { + job->oq_enabled = vc5->active_queries && vc5->current_oq; + if (job->oq_enabled) { + counter.address = cl_address(vc5->current_oq, 0); + } + } + } +} diff --git a/src/gallium/drivers/v3d/v3dx_format_table.c b/src/gallium/drivers/v3d/v3dx_format_table.c new file mode 100644 index 00000000000..458488119c7 --- /dev/null +++ b/src/gallium/drivers/v3d/v3dx_format_table.c @@ -0,0 +1,318 @@ +/* + * Copyright © 2014-2018 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "util/u_format.h" + +#include "v3d_context.h" +#include "broadcom/cle/v3dx_pack.h" +#include "broadcom/common/v3d_macros.h" +#include "v3d_format_table.h" + +#define SWIZ(x,y,z,w) { \ + PIPE_SWIZZLE_##x, \ + PIPE_SWIZZLE_##y, \ + PIPE_SWIZZLE_##z, \ + PIPE_SWIZZLE_##w \ +} + +#define FORMAT(pipe, rt, tex, swiz, return_size, return_channels) \ + [PIPE_FORMAT_##pipe] = { \ + true, \ + V3D_OUTPUT_IMAGE_FORMAT_##rt, \ + TEXTURE_DATA_FORMAT_##tex, \ + swiz, \ + return_size, \ + return_channels, \ + } + +#define SWIZ_X001 SWIZ(X, 0, 0, 1) +#define SWIZ_XY01 SWIZ(X, Y, 0, 1) +#define SWIZ_XYZ1 SWIZ(X, Y, Z, 1) +#define SWIZ_XYZW SWIZ(X, Y, Z, W) +#define SWIZ_YZWX SWIZ(Y, Z, W, X) +#define SWIZ_YZW1 SWIZ(Y, Z, W, 1) +#define SWIZ_ZYXW SWIZ(Z, Y, X, W) +#define SWIZ_ZYX1 SWIZ(Z, Y, X, 1) +#define SWIZ_XXXY SWIZ(X, X, X, Y) +#define SWIZ_XXX1 SWIZ(X, X, X, 1) +#define SWIZ_XXXX SWIZ(X, X, X, X) +#define SWIZ_000X SWIZ(0, 0, 0, X) + +static const struct vc5_format format_table[] = { + FORMAT(B8G8R8A8_UNORM, RGBA8, RGBA8, SWIZ_ZYXW, 16, 0), + FORMAT(B8G8R8X8_UNORM, RGBA8, RGBA8, SWIZ_ZYX1, 16, 0), + FORMAT(B8G8R8A8_SRGB, SRGB8_ALPHA8, RGBA8, SWIZ_ZYXW, 16, 0), + FORMAT(B8G8R8X8_SRGB, SRGB8_ALPHA8, RGBA8, SWIZ_ZYX1, 16, 0), + FORMAT(R8G8B8A8_UNORM, RGBA8, RGBA8, SWIZ_XYZW, 16, 0), + FORMAT(R8G8B8X8_UNORM, RGBA8, RGBA8, SWIZ_XYZ1, 16, 0), + FORMAT(R8G8B8A8_SNORM, NO, RGBA8_SNORM, SWIZ_XYZW, 16, 0), + FORMAT(R8G8B8X8_SNORM, NO, RGBA8_SNORM, SWIZ_XYZ1, 16, 0), + FORMAT(R10G10B10A2_UNORM, RGB10_A2, RGB10_A2, SWIZ_XYZW, 16, 0), + FORMAT(R10G10B10A2_UINT, RGB10_A2UI, RGB10_A2UI, SWIZ_XYZW, 16, 0), + + FORMAT(A4B4G4R4_UNORM, ABGR4444, RGBA4, SWIZ_XYZW, 16, 0), + + FORMAT(A1B5G5R5_UNORM, ABGR1555, RGB5_A1, SWIZ_XYZW, 16, 0), + FORMAT(X1B5G5R5_UNORM, ABGR1555, RGB5_A1, SWIZ_XYZ1, 16, 0), + FORMAT(B5G6R5_UNORM, BGR565, RGB565, SWIZ_XYZ1, 16, 0), + + FORMAT(R8_UNORM, R8, R8, SWIZ_X001, 16, 0), + FORMAT(R8_SNORM, NO, R8_SNORM, SWIZ_X001, 16, 0), + FORMAT(R8G8_UNORM, RG8, RG8, SWIZ_XY01, 16, 0), + FORMAT(R8G8_SNORM, NO, RG8_SNORM, SWIZ_XY01, 16, 0), + + FORMAT(R16_UNORM, NO, R16, SWIZ_X001, 32, 1), + FORMAT(R16_SNORM, NO, R16_SNORM, SWIZ_X001, 32, 1), + FORMAT(R16_FLOAT, R16F, R16F, SWIZ_X001, 16, 0), + FORMAT(R32_FLOAT, R32F, R32F, SWIZ_X001, 32, 1), + + FORMAT(R16G16_UNORM, NO, RG16, SWIZ_XY01, 32, 2), + FORMAT(R16G16_SNORM, NO, RG16_SNORM, SWIZ_XY01, 32, 2), + FORMAT(R16G16_FLOAT, RG16F, RG16F, SWIZ_XY01, 16, 0), + FORMAT(R32G32_FLOAT, RG32F, RG32F, SWIZ_XY01, 32, 2), + + FORMAT(R16G16B16A16_UNORM, NO, RGBA16, SWIZ_XYZW, 32, 4), + FORMAT(R16G16B16A16_SNORM, NO, RGBA16_SNORM, SWIZ_XYZW, 32, 4), + FORMAT(R16G16B16A16_FLOAT, RGBA16F, RGBA16F, SWIZ_XYZW, 16, 0), + FORMAT(R32G32B32A32_FLOAT, RGBA32F, RGBA32F, SWIZ_XYZW, 32, 4), + + /* If we don't have L/A/LA16, mesa/st will fall back to RGBA16. */ + FORMAT(L16_UNORM, NO, R16, SWIZ_XXX1, 32, 1), + FORMAT(L16_SNORM, NO, R16_SNORM, SWIZ_XXX1, 32, 1), + FORMAT(I16_UNORM, NO, R16, SWIZ_XXXX, 32, 1), + FORMAT(I16_SNORM, NO, R16_SNORM, SWIZ_XXXX, 32, 1), + FORMAT(A16_UNORM, NO, R16, SWIZ_000X, 32, 1), + FORMAT(A16_SNORM, NO, R16_SNORM, SWIZ_000X, 32, 1), + FORMAT(L16A16_UNORM, NO, RG16, SWIZ_XXXY, 32, 2), + FORMAT(L16A16_SNORM, NO, RG16_SNORM, SWIZ_XXXY, 32, 2), + + FORMAT(A8_UNORM, NO, R8, SWIZ_000X, 16, 0), + FORMAT(L8_UNORM, NO, R8, SWIZ_XXX1, 16, 0), + FORMAT(I8_UNORM, NO, R8, SWIZ_XXXX, 16, 0), + FORMAT(L8A8_UNORM, NO, RG8, SWIZ_XXXY, 16, 0), + + FORMAT(R8_SINT, R8I, R8I, SWIZ_X001, 16, 0), + FORMAT(R8_UINT, R8UI, R8UI, SWIZ_X001, 16, 0), + FORMAT(R8G8_SINT, RG8I, RG8I, SWIZ_XY01, 16, 0), + FORMAT(R8G8_UINT, RG8UI, RG8UI, SWIZ_XY01, 16, 0), + FORMAT(R8G8B8A8_SINT, RGBA8I, RGBA8I, SWIZ_XYZW, 16, 0), + FORMAT(R8G8B8A8_UINT, RGBA8UI, RGBA8UI, SWIZ_XYZW, 16, 0), + + FORMAT(R16_SINT, R16I, R16I, SWIZ_X001, 16, 0), + FORMAT(R16_UINT, R16UI, R16UI, SWIZ_X001, 16, 0), + FORMAT(R16G16_SINT, RG16I, RG16I, SWIZ_XY01, 16, 0), + FORMAT(R16G16_UINT, RG16UI, RG16UI, SWIZ_XY01, 16, 0), + FORMAT(R16G16B16A16_SINT, RGBA16I, RGBA16I, SWIZ_XYZW, 16, 0), + FORMAT(R16G16B16A16_UINT, RGBA16UI, RGBA16UI, SWIZ_XYZW, 16, 0), + + FORMAT(R32_SINT, R32I, R32I, SWIZ_X001, 32, 1), + FORMAT(R32_UINT, R32UI, R32UI, SWIZ_X001, 32, 1), + FORMAT(R32G32_SINT, RG32I, RG32I, SWIZ_XY01, 32, 2), + FORMAT(R32G32_UINT, RG32UI, RG32UI, SWIZ_XY01, 32, 2), + FORMAT(R32G32B32A32_SINT, RGBA32I, RGBA32I, SWIZ_XYZW, 32, 4), + FORMAT(R32G32B32A32_UINT, RGBA32UI, RGBA32UI, SWIZ_XYZW, 32, 4), + + FORMAT(A8_SINT, R8I, R8I, SWIZ_000X, 16, 0), + FORMAT(A8_UINT, R8UI, R8UI, SWIZ_000X, 16, 0), + FORMAT(A16_SINT, R16I, R16I, SWIZ_000X, 16, 0), + FORMAT(A16_UINT, R16UI, R16UI, SWIZ_000X, 16, 0), + FORMAT(A32_SINT, R32I, R32I, SWIZ_000X, 32, 1), + FORMAT(A32_UINT, R32UI, R32UI, SWIZ_000X, 32, 1), + + FORMAT(R11G11B10_FLOAT, R11F_G11F_B10F, R11F_G11F_B10F, SWIZ_XYZW, 16, 0), + FORMAT(R9G9B9E5_FLOAT, NO, RGB9_E5, SWIZ_XYZW, 16, 0), + +#if V3D_VERSION >= 40 + FORMAT(S8_UINT_Z24_UNORM, D24S8, DEPTH24_X8, SWIZ_XXXX, 32, 1), + FORMAT(X8Z24_UNORM, D24S8, DEPTH24_X8, SWIZ_XXXX, 32, 1), + FORMAT(S8X24_UINT, S8, R32F, SWIZ_XXXX, 32, 1), + FORMAT(Z32_FLOAT, D32F, R32F, SWIZ_XXXX, 32, 1), + FORMAT(Z16_UNORM, D16, DEPTH_COMP16,SWIZ_XXXX, 32, 1), + + /* Pretend we support this, but it'll be separate Z32F depth and S8. */ + FORMAT(Z32_FLOAT_S8X24_UINT, D32F, R32F, SWIZ_XXXX, 32, 1), +#else + FORMAT(S8_UINT_Z24_UNORM, ZS_DEPTH24_STENCIL8, DEPTH24_X8, SWIZ_XXXX, 32, 1), + FORMAT(X8Z24_UNORM, ZS_DEPTH24_STENCIL8, DEPTH24_X8, SWIZ_XXXX, 32, 1), + FORMAT(S8X24_UINT, NO, R32F, SWIZ_XXXX, 32, 1), + FORMAT(Z32_FLOAT, ZS_DEPTH_COMPONENT32F, R32F, SWIZ_XXXX, 32, 1), + FORMAT(Z16_UNORM, ZS_DEPTH_COMPONENT16, DEPTH_COMP16, SWIZ_XXXX, 32, 1), + + /* Pretend we support this, but it'll be separate Z32F depth and S8. */ + FORMAT(Z32_FLOAT_S8X24_UINT, ZS_DEPTH_COMPONENT32F, R32F, SWIZ_XXXX, 32, 1), +#endif + + FORMAT(ETC2_RGB8, NO, RGB8_ETC2, SWIZ_XYZ1, 16, 0), + FORMAT(ETC2_SRGB8, NO, RGB8_ETC2, SWIZ_XYZ1, 16, 0), + FORMAT(ETC2_RGB8A1, NO, RGB8_PUNCHTHROUGH_ALPHA1, SWIZ_XYZW, 16, 0), + FORMAT(ETC2_SRGB8A1, NO, RGB8_PUNCHTHROUGH_ALPHA1, SWIZ_XYZW, 16, 0), + FORMAT(ETC2_RGBA8, NO, RGBA8_ETC2_EAC, SWIZ_XYZW, 16, 0), + FORMAT(ETC2_SRGBA8, NO, RGBA8_ETC2_EAC, SWIZ_XYZW, 16, 0), + FORMAT(ETC2_R11_UNORM, NO, R11_EAC, SWIZ_X001, 16, 0), + FORMAT(ETC2_R11_SNORM, NO, SIGNED_R11_EAC, SWIZ_X001, 16, 0), + FORMAT(ETC2_RG11_UNORM, NO, RG11_EAC, SWIZ_XY01, 16, 0), + FORMAT(ETC2_RG11_SNORM, NO, SIGNED_RG11_EAC, SWIZ_XY01, 16, 0), + + FORMAT(DXT1_RGB, NO, BC1, SWIZ_XYZ1, 16, 0), + FORMAT(DXT3_RGBA, NO, BC2, SWIZ_XYZ1, 16, 0), + FORMAT(DXT5_RGBA, NO, BC3, SWIZ_XYZ1, 16, 0), +}; + +const struct vc5_format * +v3dX(get_format_desc)(enum pipe_format f) +{ + if (f < ARRAY_SIZE(format_table) && format_table[f].present) + return &format_table[f]; + else + return NULL; +} + +void +v3dX(get_internal_type_bpp_for_output_format)(uint32_t format, + uint32_t *type, + uint32_t *bpp) +{ + switch (format) { + case V3D_OUTPUT_IMAGE_FORMAT_RGBA8: +#if V3D_VERSION < 41 + case V3D_OUTPUT_IMAGE_FORMAT_RGBX8: +#endif + case V3D_OUTPUT_IMAGE_FORMAT_RGB8: + case V3D_OUTPUT_IMAGE_FORMAT_RG8: + case V3D_OUTPUT_IMAGE_FORMAT_R8: + case V3D_OUTPUT_IMAGE_FORMAT_ABGR4444: + case V3D_OUTPUT_IMAGE_FORMAT_BGR565: + case V3D_OUTPUT_IMAGE_FORMAT_ABGR1555: + *type = V3D_INTERNAL_TYPE_8; + *bpp = V3D_INTERNAL_BPP_32; + break; + + case V3D_OUTPUT_IMAGE_FORMAT_RGBA8I: + case V3D_OUTPUT_IMAGE_FORMAT_RG8I: + case V3D_OUTPUT_IMAGE_FORMAT_R8I: + *type = V3D_INTERNAL_TYPE_8I; + *bpp = V3D_INTERNAL_BPP_32; + break; + + case V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI: + case V3D_OUTPUT_IMAGE_FORMAT_RG8UI: + case V3D_OUTPUT_IMAGE_FORMAT_R8UI: + *type = V3D_INTERNAL_TYPE_8UI; + *bpp = V3D_INTERNAL_BPP_32; + break; + + case V3D_OUTPUT_IMAGE_FORMAT_SRGB8_ALPHA8: + case V3D_OUTPUT_IMAGE_FORMAT_SRGB: + case V3D_OUTPUT_IMAGE_FORMAT_RGB10_A2: + case V3D_OUTPUT_IMAGE_FORMAT_R11F_G11F_B10F: +#if V3D_VERSION < 41 + case V3D_OUTPUT_IMAGE_FORMAT_SRGBX8: +#endif + case V3D_OUTPUT_IMAGE_FORMAT_RGBA16F: + /* Note that sRGB RTs are stored in the tile buffer at 16F, + * and the conversion to sRGB happens at tilebuffer + * load/store. + */ + *type = V3D_INTERNAL_TYPE_16F; + *bpp = V3D_INTERNAL_BPP_64; + break; + + case V3D_OUTPUT_IMAGE_FORMAT_RG16F: + case V3D_OUTPUT_IMAGE_FORMAT_R16F: + *type = V3D_INTERNAL_TYPE_16F; + /* Use 64bpp to make sure the TLB doesn't throw away the alpha + * channel before alpha test happens. + */ + *bpp = V3D_INTERNAL_BPP_64; + break; + + case V3D_OUTPUT_IMAGE_FORMAT_RGBA16I: + *type = V3D_INTERNAL_TYPE_16I; + *bpp = V3D_INTERNAL_BPP_64; + break; + case V3D_OUTPUT_IMAGE_FORMAT_RG16I: + case V3D_OUTPUT_IMAGE_FORMAT_R16I: + *type = V3D_INTERNAL_TYPE_16I; + *bpp = V3D_INTERNAL_BPP_32; + break; + + case V3D_OUTPUT_IMAGE_FORMAT_RGB10_A2UI: + case V3D_OUTPUT_IMAGE_FORMAT_RGBA16UI: + *type = V3D_INTERNAL_TYPE_16UI; + *bpp = V3D_INTERNAL_BPP_64; + break; + case V3D_OUTPUT_IMAGE_FORMAT_RG16UI: + case V3D_OUTPUT_IMAGE_FORMAT_R16UI: + *type = V3D_INTERNAL_TYPE_16UI; + *bpp = V3D_INTERNAL_BPP_32; + break; + + case V3D_OUTPUT_IMAGE_FORMAT_RGBA32I: + *type = V3D_INTERNAL_TYPE_32I; + *bpp = V3D_INTERNAL_BPP_128; + break; + case V3D_OUTPUT_IMAGE_FORMAT_RG32I: + *type = V3D_INTERNAL_TYPE_32I; + *bpp = V3D_INTERNAL_BPP_64; + break; + case V3D_OUTPUT_IMAGE_FORMAT_R32I: + *type = V3D_INTERNAL_TYPE_32I; + *bpp = V3D_INTERNAL_BPP_32; + break; + + case V3D_OUTPUT_IMAGE_FORMAT_RGBA32UI: + *type = V3D_INTERNAL_TYPE_32UI; + *bpp = V3D_INTERNAL_BPP_128; + break; + case V3D_OUTPUT_IMAGE_FORMAT_RG32UI: + *type = V3D_INTERNAL_TYPE_32UI; + *bpp = V3D_INTERNAL_BPP_64; + break; + case V3D_OUTPUT_IMAGE_FORMAT_R32UI: + *type = V3D_INTERNAL_TYPE_32UI; + *bpp = V3D_INTERNAL_BPP_32; + break; + + case V3D_OUTPUT_IMAGE_FORMAT_RGBA32F: + *type = V3D_INTERNAL_TYPE_32F; + *bpp = V3D_INTERNAL_BPP_128; + break; + case V3D_OUTPUT_IMAGE_FORMAT_RG32F: + *type = V3D_INTERNAL_TYPE_32F; + *bpp = V3D_INTERNAL_BPP_64; + break; + case V3D_OUTPUT_IMAGE_FORMAT_R32F: + *type = V3D_INTERNAL_TYPE_32F; + *bpp = V3D_INTERNAL_BPP_32; + break; + + default: + /* Provide some default values, as we'll be called at RB + * creation time, even if an RB with this format isn't + * supported. + */ + *type = V3D_INTERNAL_TYPE_8; + *bpp = V3D_INTERNAL_BPP_32; + break; + } +} diff --git a/src/gallium/drivers/v3d/v3dx_job.c b/src/gallium/drivers/v3d/v3dx_job.c new file mode 100644 index 00000000000..5e1a345b170 --- /dev/null +++ b/src/gallium/drivers/v3d/v3dx_job.c @@ -0,0 +1,76 @@ +/* + * Copyright © 2014-2017 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/** @file v3dx_job.c + * + * V3D version-specific functions for submitting VC5 render jobs to the + * kernel. + */ + +#include "v3d_context.h" +#include "broadcom/cle/v3dx_pack.h" + +void v3dX(bcl_epilogue)(struct vc5_context *vc5, struct vc5_job *job) +{ + vc5_cl_ensure_space_with_branch(&job->bcl, + cl_packet_length(OCCLUSION_QUERY_COUNTER) + +#if V3D_VERSION >= 41 + cl_packet_length(TRANSFORM_FEEDBACK_SPECS) + +#endif + cl_packet_length(INCREMENT_SEMAPHORE) + + cl_packet_length(FLUSH_ALL_STATE)); + + if (job->oq_enabled) { + /* Disable the OQ at the end of the CL, so that the + * draw calls at the start of the CL don't inherit the + * OQ counter. + */ + cl_emit(&job->bcl, OCCLUSION_QUERY_COUNTER, counter); + } + + /* Disable TF at the end of the CL, so that the next job to be + * run doesn't start out trying to write TF primitives. On + * V3D 3.x, it's only the TF primitive mode that triggers TF + * writes. + */ +#if V3D_VERSION >= 41 + if (job->tf_enabled) { + cl_emit(&job->bcl, TRANSFORM_FEEDBACK_SPECS, tfe) { + tfe.enable = false; + }; + } +#endif /* V3D_VERSION >= 41 */ + + /* Increment the semaphore indicating that binning is done and + * unblocking the render thread. Note that this doesn't act + * until the FLUSH completes. + */ + cl_emit(&job->bcl, INCREMENT_SEMAPHORE, incr); + + /* The FLUSH_ALL emits any unwritten state changes in each + * tile. We can use this to reset any state that needs to be + * present at the start of the next tile, as we do with + * OCCLUSION_QUERY_COUNTER above. + */ + cl_emit(&job->bcl, FLUSH_ALL_STATE, flush); +} diff --git a/src/gallium/drivers/v3d/v3dx_rcl.c b/src/gallium/drivers/v3d/v3dx_rcl.c new file mode 100644 index 00000000000..3801d03ecee --- /dev/null +++ b/src/gallium/drivers/v3d/v3dx_rcl.c @@ -0,0 +1,782 @@ +/* + * Copyright © 2017 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "util/u_format.h" +#include "v3d_context.h" +#include "v3d_tiling.h" +#include "broadcom/common/v3d_macros.h" +#include "broadcom/cle/v3dx_pack.h" + +#define PIPE_CLEAR_COLOR_BUFFERS (PIPE_CLEAR_COLOR0 | \ + PIPE_CLEAR_COLOR1 | \ + PIPE_CLEAR_COLOR2 | \ + PIPE_CLEAR_COLOR3) \ + +#define PIPE_FIRST_COLOR_BUFFER_BIT (ffs(PIPE_CLEAR_COLOR0) - 1) + +/* The HW queues up the load until the tile coordinates show up, but can only + * track one at a time. If we need to do more than one load, then we need to + * flush out the previous load by emitting the tile coordinates and doing a + * dummy store. + */ +static void +flush_last_load(struct vc5_cl *cl) +{ + if (V3D_VERSION >= 40) + return; + + cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords); + cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) { + store.buffer_to_store = NONE; + } +} + +static void +load_general(struct vc5_cl *cl, struct pipe_surface *psurf, int buffer, + uint32_t pipe_bit, uint32_t *loads_pending) +{ + struct vc5_surface *surf = vc5_surface(psurf); + bool separate_stencil = surf->separate_stencil && buffer == STENCIL; + if (separate_stencil) { + psurf = surf->separate_stencil; + surf = vc5_surface(psurf); + } + + struct vc5_resource *rsc = vc5_resource(psurf->texture); + + cl_emit(cl, LOAD_TILE_BUFFER_GENERAL, load) { + load.buffer_to_load = buffer; + load.address = cl_address(rsc->bo, surf->offset); + +#if V3D_VERSION >= 40 + load.memory_format = surf->tiling; + if (separate_stencil) + load.input_image_format = V3D_OUTPUT_IMAGE_FORMAT_S8; + else + load.input_image_format = surf->format; + + if (surf->tiling == VC5_TILING_UIF_NO_XOR || + surf->tiling == VC5_TILING_UIF_XOR) { + load.height_in_ub_or_stride = + surf->padded_height_of_output_image_in_uif_blocks; + } else if (surf->tiling == VC5_TILING_RASTER) { + struct vc5_resource_slice *slice = + &rsc->slices[psurf->u.tex.level]; + load.height_in_ub_or_stride = slice->stride; + } + + if (psurf->texture->nr_samples > 1) + load.decimate_mode = V3D_DECIMATE_MODE_ALL_SAMPLES; + else + load.decimate_mode = V3D_DECIMATE_MODE_SAMPLE_0; + +#else /* V3D_VERSION < 40 */ + /* Can't do raw ZSTENCIL loads -- need to load/store them to + * separate buffers for Z and stencil. + */ + assert(buffer != ZSTENCIL); + load.raw_mode = true; + load.padded_height_of_output_image_in_uif_blocks = + surf->padded_height_of_output_image_in_uif_blocks; +#endif /* V3D_VERSION < 40 */ + } + + *loads_pending &= ~pipe_bit; + if (*loads_pending) + flush_last_load(cl); +} + +static void +store_general(struct vc5_job *job, + struct vc5_cl *cl, struct pipe_surface *psurf, int buffer, + int pipe_bit, uint32_t *stores_pending, bool general_color_clear) +{ + struct vc5_surface *surf = vc5_surface(psurf); + bool separate_stencil = surf->separate_stencil && buffer == STENCIL; + if (separate_stencil) { + psurf = surf->separate_stencil; + surf = vc5_surface(psurf); + } + + *stores_pending &= ~pipe_bit; + bool last_store = !(*stores_pending); + + struct vc5_resource *rsc = vc5_resource(psurf->texture); + + rsc->writes++; + + cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) { + store.buffer_to_store = buffer; + store.address = cl_address(rsc->bo, surf->offset); + +#if V3D_VERSION >= 40 + store.clear_buffer_being_stored = + ((job->cleared & pipe_bit) && + (general_color_clear || + !(pipe_bit & PIPE_CLEAR_COLOR_BUFFERS))); + + if (separate_stencil) + store.output_image_format = V3D_OUTPUT_IMAGE_FORMAT_S8; + else + store.output_image_format = surf->format; + + store.memory_format = surf->tiling; + + if (surf->tiling == VC5_TILING_UIF_NO_XOR || + surf->tiling == VC5_TILING_UIF_XOR) { + store.height_in_ub_or_stride = + surf->padded_height_of_output_image_in_uif_blocks; + } else if (surf->tiling == VC5_TILING_RASTER) { + struct vc5_resource_slice *slice = + &rsc->slices[psurf->u.tex.level]; + store.height_in_ub_or_stride = slice->stride; + } + + if (psurf->texture->nr_samples > 1) + store.decimate_mode = V3D_DECIMATE_MODE_ALL_SAMPLES; + else + store.decimate_mode = V3D_DECIMATE_MODE_SAMPLE_0; + +#else /* V3D_VERSION < 40 */ + /* Can't do raw ZSTENCIL stores -- need to load/store them to + * separate buffers for Z and stencil. + */ + assert(buffer != ZSTENCIL); + store.raw_mode = true; + if (!last_store) { + store.disable_colour_buffers_clear_on_write = true; + store.disable_z_buffer_clear_on_write = true; + store.disable_stencil_buffer_clear_on_write = true; + } else { + store.disable_colour_buffers_clear_on_write = + !(((pipe_bit & PIPE_CLEAR_COLOR_BUFFERS) && + general_color_clear && + (job->cleared & pipe_bit))); + store.disable_z_buffer_clear_on_write = + !(job->cleared & PIPE_CLEAR_DEPTH); + store.disable_stencil_buffer_clear_on_write = + !(job->cleared & PIPE_CLEAR_STENCIL); + } + store.padded_height_of_output_image_in_uif_blocks = + surf->padded_height_of_output_image_in_uif_blocks; +#endif /* V3D_VERSION < 40 */ + } + + /* There must be a TILE_COORDINATES_IMPLICIT between each store. */ + if (V3D_VERSION < 40 && !last_store) { + cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords); + } +} + +static int +zs_buffer_from_pipe_bits(int pipe_clear_bits) +{ + switch (pipe_clear_bits & PIPE_CLEAR_DEPTHSTENCIL) { + case PIPE_CLEAR_DEPTHSTENCIL: + return ZSTENCIL; + case PIPE_CLEAR_DEPTH: + return Z; + case PIPE_CLEAR_STENCIL: + return STENCIL; + default: + return NONE; + } +} + +static void +vc5_rcl_emit_loads(struct vc5_job *job, struct vc5_cl *cl) +{ + uint32_t loads_pending = job->resolve & ~job->cleared; + + for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++) { + uint32_t bit = PIPE_CLEAR_COLOR0 << i; + if (!(loads_pending & bit)) + continue; + + struct pipe_surface *psurf = job->cbufs[i]; + if (!psurf || (V3D_VERSION < 40 && + psurf->texture->nr_samples <= 1)) { + continue; + } + + load_general(cl, psurf, RENDER_TARGET_0 + i, + bit, &loads_pending); + } + + if ((loads_pending & PIPE_CLEAR_DEPTHSTENCIL) && + (V3D_VERSION >= 40 || + (job->zsbuf && job->zsbuf->texture->nr_samples > 1))) { + struct vc5_resource *rsc = vc5_resource(job->zsbuf->texture); + + if (rsc->separate_stencil && + (loads_pending & PIPE_CLEAR_STENCIL)) { + load_general(cl, job->zsbuf, + STENCIL, + PIPE_CLEAR_STENCIL, + &loads_pending); + } + + if (loads_pending & PIPE_CLEAR_DEPTHSTENCIL) { + load_general(cl, job->zsbuf, + zs_buffer_from_pipe_bits(loads_pending), + loads_pending & PIPE_CLEAR_DEPTHSTENCIL, + &loads_pending); + } + } + +#if V3D_VERSION < 40 + /* The initial reload will be queued until we get the + * tile coordinates. + */ + if (loads_pending) { + cl_emit(cl, RELOAD_TILE_COLOUR_BUFFER, load) { + load.disable_colour_buffer_load = + (~loads_pending & + PIPE_CLEAR_COLOR_BUFFERS) >> + PIPE_FIRST_COLOR_BUFFER_BIT; + load.enable_z_load = + loads_pending & PIPE_CLEAR_DEPTH; + load.enable_stencil_load = + loads_pending & PIPE_CLEAR_STENCIL; + } + } +#else /* V3D_VERSION >= 40 */ + assert(!loads_pending); + cl_emit(cl, END_OF_LOADS, end); +#endif +} + +static void +vc5_rcl_emit_stores(struct vc5_job *job, struct vc5_cl *cl) +{ + MAYBE_UNUSED bool needs_color_clear = job->cleared & PIPE_CLEAR_COLOR_BUFFERS; + MAYBE_UNUSED bool needs_z_clear = job->cleared & PIPE_CLEAR_DEPTH; + MAYBE_UNUSED bool needs_s_clear = job->cleared & PIPE_CLEAR_STENCIL; + + /* For clearing color in a TLB general on V3D 3.3: + * + * - NONE buffer store clears all TLB color buffers. + * - color buffer store clears just the TLB color buffer being stored. + * - Z/S buffers store may not clear the TLB color buffer. + * + * And on V3D 4.1, we only have one flag for "clear the buffer being + * stored" in the general packet, and a separate packet to clear all + * color TLB buffers. + * + * As a result, we only bother flagging TLB color clears in a general + * packet when we don't have to emit a separate packet to clear all + * TLB color buffers. + */ + bool general_color_clear = (needs_color_clear && + (job->cleared & PIPE_CLEAR_COLOR_BUFFERS) == + (job->resolve & PIPE_CLEAR_COLOR_BUFFERS)); + + uint32_t stores_pending = job->resolve; + + /* For V3D 4.1, use general stores for all TLB stores. + * + * For V3D 3.3, we only use general stores to do raw stores for any + * MSAA surfaces. These output UIF tiled images where each 4x MSAA + * pixel is a 2x2 quad, and the format will be that of the + * internal_type/internal_bpp, rather than the format from GL's + * perspective. Non-MSAA surfaces will use + * STORE_MULTI_SAMPLE_RESOLVED_TILE_COLOR_BUFFER_EXTENDED. + */ + for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++) { + uint32_t bit = PIPE_CLEAR_COLOR0 << i; + if (!(job->resolve & bit)) + continue; + + struct pipe_surface *psurf = job->cbufs[i]; + if (!psurf || + (V3D_VERSION < 40 && psurf->texture->nr_samples <= 1)) { + continue; + } + + store_general(job, cl, psurf, RENDER_TARGET_0 + i, bit, + &stores_pending, general_color_clear); + } + + if (job->resolve & PIPE_CLEAR_DEPTHSTENCIL && job->zsbuf && + !(V3D_VERSION < 40 && job->zsbuf->texture->nr_samples <= 1)) { + struct vc5_resource *rsc = vc5_resource(job->zsbuf->texture); + if (rsc->separate_stencil) { + if (job->resolve & PIPE_CLEAR_DEPTH) { + store_general(job, cl, job->zsbuf, Z, + PIPE_CLEAR_DEPTH, + &stores_pending, + general_color_clear); + } + + if (job->resolve & PIPE_CLEAR_STENCIL) { + store_general(job, cl, job->zsbuf, STENCIL, + PIPE_CLEAR_STENCIL, + &stores_pending, + general_color_clear); + } + } else { + store_general(job, cl, job->zsbuf, + zs_buffer_from_pipe_bits(job->resolve), + job->resolve & PIPE_CLEAR_DEPTHSTENCIL, + &stores_pending, general_color_clear); + } + } + + if (stores_pending) { +#if V3D_VERSION < 40 + cl_emit(cl, STORE_MULTI_SAMPLE_RESOLVED_TILE_COLOR_BUFFER_EXTENDED, store) { + + store.disable_color_buffer_write = + (~stores_pending >> + PIPE_FIRST_COLOR_BUFFER_BIT) & 0xf; + store.enable_z_write = stores_pending & PIPE_CLEAR_DEPTH; + store.enable_stencil_write = stores_pending & PIPE_CLEAR_STENCIL; + + /* Note that when set this will clear all of the color + * buffers. + */ + store.disable_colour_buffers_clear_on_write = + !needs_color_clear; + store.disable_z_buffer_clear_on_write = + !needs_z_clear; + store.disable_stencil_buffer_clear_on_write = + !needs_s_clear; + }; +#else /* V3D_VERSION >= 40 */ + unreachable("All color buffers should have been stored."); +#endif /* V3D_VERSION >= 40 */ + } else if (needs_color_clear && !general_color_clear) { + /* If we didn't do our color clears in the general packet, + * then emit a packet to clear all the TLB color buffers now. + */ +#if V3D_VERSION < 40 + cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) { + store.buffer_to_store = NONE; + } +#else /* V3D_VERSION >= 40 */ + cl_emit(cl, CLEAR_TILE_BUFFERS, clear) { + clear.clear_all_render_targets = true; + } +#endif /* V3D_VERSION >= 40 */ + } +} + +static void +vc5_rcl_emit_generic_per_tile_list(struct vc5_job *job, int last_cbuf) +{ + /* Emit the generic list in our indirect state -- the rcl will just + * have pointers into it. + */ + struct vc5_cl *cl = &job->indirect; + vc5_cl_ensure_space(cl, 200, 1); + struct vc5_cl_reloc tile_list_start = cl_get_address(cl); + + if (V3D_VERSION >= 40) { + /* V3D 4.x only requires a single tile coordinates, and + * END_OF_LOADS switches us between loading and rendering. + */ + cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords); + } + + vc5_rcl_emit_loads(job, cl); + + if (V3D_VERSION < 40) { + /* Tile Coordinates triggers the last reload and sets where + * the stores go. There must be one per store packet. + */ + cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords); + } + + /* The binner starts out writing tiles assuming that the initial mode + * is triangles, so make sure that's the case. + */ + cl_emit(cl, PRIMITIVE_LIST_FORMAT, fmt) { + fmt.data_type = LIST_INDEXED; + fmt.primitive_type = LIST_TRIANGLES; + } + + cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch); + + vc5_rcl_emit_stores(job, cl); + +#if V3D_VERSION >= 40 + cl_emit(cl, END_OF_TILE_MARKER, end); +#endif + + cl_emit(cl, RETURN_FROM_SUB_LIST, ret); + + cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) { + branch.start = tile_list_start; + branch.end = cl_get_address(cl); + } +} + +#if V3D_VERSION >= 40 +static void +v3d_setup_render_target(struct vc5_job *job, int cbuf, + uint32_t *rt_bpp, uint32_t *rt_type, uint32_t *rt_clamp) +{ + if (!job->cbufs[cbuf]) + return; + + struct vc5_surface *surf = vc5_surface(job->cbufs[cbuf]); + *rt_bpp = surf->internal_bpp; + *rt_type = surf->internal_type; + *rt_clamp = V3D_RENDER_TARGET_CLAMP_NONE; +} + +#else /* V3D_VERSION < 40 */ + +static void +v3d_emit_z_stencil_config(struct vc5_job *job, struct vc5_surface *surf, + struct vc5_resource *rsc, bool is_separate_stencil) +{ + cl_emit(&job->rcl, TILE_RENDERING_MODE_CONFIGURATION_Z_STENCIL_CONFIG, zs) { + zs.address = cl_address(rsc->bo, surf->offset); + + if (!is_separate_stencil) { + zs.internal_type = surf->internal_type; + zs.output_image_format = surf->format; + } else { + zs.z_stencil_id = 1; /* Separate stencil */ + } + + zs.padded_height_of_output_image_in_uif_blocks = + surf->padded_height_of_output_image_in_uif_blocks; + + assert(surf->tiling != VC5_TILING_RASTER); + zs.memory_format = surf->tiling; + } + + if (job->resolve & (is_separate_stencil ? + PIPE_CLEAR_STENCIL : + PIPE_CLEAR_DEPTHSTENCIL)) { + rsc->writes++; + } +} +#endif /* V3D_VERSION < 40 */ + +#define div_round_up(a, b) (((a) + (b) - 1) / b) + +void +v3dX(emit_rcl)(struct vc5_job *job) +{ + /* The RCL list should be empty. */ + assert(!job->rcl.bo); + + vc5_cl_ensure_space_with_branch(&job->rcl, 200 + 256 * + cl_packet_length(SUPERTILE_COORDINATES)); + job->submit.rcl_start = job->rcl.bo->offset; + vc5_job_add_bo(job, job->rcl.bo); + + int nr_cbufs = 0; + for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++) { + if (job->cbufs[i]) + nr_cbufs = i + 1; + } + + /* Comon config must be the first TILE_RENDERING_MODE_CONFIGURATION + * and Z_STENCIL_CLEAR_VALUES must be last. The ones in between are + * optional updates to the previous HW state. + */ + cl_emit(&job->rcl, TILE_RENDERING_MODE_CONFIGURATION_COMMON_CONFIGURATION, + config) { +#if V3D_VERSION < 40 + config.enable_z_store = job->resolve & PIPE_CLEAR_DEPTH; + config.enable_stencil_store = job->resolve & PIPE_CLEAR_STENCIL; +#else /* V3D_VERSION >= 40 */ + if (job->zsbuf) { + struct vc5_surface *surf = vc5_surface(job->zsbuf); + config.internal_depth_type = surf->internal_type; + } +#endif /* V3D_VERSION >= 40 */ + + /* XXX: Early D/S clear */ + + switch (job->first_ez_state) { + case VC5_EZ_UNDECIDED: + case VC5_EZ_LT_LE: + config.early_z_disable = false; + config.early_z_test_and_update_direction = + EARLY_Z_DIRECTION_LT_LE; + break; + case VC5_EZ_GT_GE: + config.early_z_disable = false; + config.early_z_test_and_update_direction = + EARLY_Z_DIRECTION_GT_GE; + break; + case VC5_EZ_DISABLED: + config.early_z_disable = true; + } + + config.image_width_pixels = job->draw_width; + config.image_height_pixels = job->draw_height; + + config.number_of_render_targets_minus_1 = + MAX2(nr_cbufs, 1) - 1; + + config.multisample_mode_4x = job->msaa; + + config.maximum_bpp_of_all_render_targets = job->internal_bpp; + } + + for (int i = 0; i < nr_cbufs; i++) { + struct pipe_surface *psurf = job->cbufs[i]; + if (!psurf) + continue; + struct vc5_surface *surf = vc5_surface(psurf); + struct vc5_resource *rsc = vc5_resource(psurf->texture); + + MAYBE_UNUSED uint32_t config_pad = 0; + uint32_t clear_pad = 0; + + /* XXX: Set the pad for raster. */ + if (surf->tiling == VC5_TILING_UIF_NO_XOR || + surf->tiling == VC5_TILING_UIF_XOR) { + int uif_block_height = vc5_utile_height(rsc->cpp) * 2; + uint32_t implicit_padded_height = (align(job->draw_height, uif_block_height) / + uif_block_height); + if (surf->padded_height_of_output_image_in_uif_blocks - + implicit_padded_height < 15) { + config_pad = (surf->padded_height_of_output_image_in_uif_blocks - + implicit_padded_height); + } else { + config_pad = 15; + clear_pad = surf->padded_height_of_output_image_in_uif_blocks; + } + } + +#if V3D_VERSION < 40 + cl_emit(&job->rcl, TILE_RENDERING_MODE_CONFIGURATION_RENDER_TARGET_CONFIG, rt) { + rt.address = cl_address(rsc->bo, surf->offset); + rt.internal_type = surf->internal_type; + rt.output_image_format = surf->format; + rt.memory_format = surf->tiling; + rt.internal_bpp = surf->internal_bpp; + rt.render_target_number = i; + rt.pad = config_pad; + + if (job->resolve & PIPE_CLEAR_COLOR0 << i) + rsc->writes++; + } +#endif /* V3D_VERSION < 40 */ + + cl_emit(&job->rcl, TILE_RENDERING_MODE_CONFIGURATION_CLEAR_COLORS_PART1, + clear) { + clear.clear_color_low_32_bits = job->clear_color[i][0]; + clear.clear_color_next_24_bits = job->clear_color[i][1] & 0xffffff; + clear.render_target_number = i; + }; + + if (surf->internal_bpp >= V3D_INTERNAL_BPP_64) { + cl_emit(&job->rcl, TILE_RENDERING_MODE_CONFIGURATION_CLEAR_COLORS_PART2, + clear) { + clear.clear_color_mid_low_32_bits = + ((job->clear_color[i][1] >> 24) | + (job->clear_color[i][2] << 8)); + clear.clear_color_mid_high_24_bits = + ((job->clear_color[i][2] >> 24) | + ((job->clear_color[i][3] & 0xffff) << 8)); + clear.render_target_number = i; + }; + } + + if (surf->internal_bpp >= V3D_INTERNAL_BPP_128 || clear_pad) { + cl_emit(&job->rcl, TILE_RENDERING_MODE_CONFIGURATION_CLEAR_COLORS_PART3, + clear) { + clear.uif_padded_height_in_uif_blocks = clear_pad; + clear.clear_color_high_16_bits = job->clear_color[i][3] >> 16; + clear.render_target_number = i; + }; + } + } + +#if V3D_VERSION >= 40 + cl_emit(&job->rcl, TILE_RENDERING_MODE_CONFIGURATION_RENDER_TARGET_CONFIG, rt) { + v3d_setup_render_target(job, 0, + &rt.render_target_0_internal_bpp, + &rt.render_target_0_internal_type, + &rt.render_target_0_clamp); + v3d_setup_render_target(job, 1, + &rt.render_target_1_internal_bpp, + &rt.render_target_1_internal_type, + &rt.render_target_1_clamp); + v3d_setup_render_target(job, 2, + &rt.render_target_2_internal_bpp, + &rt.render_target_2_internal_type, + &rt.render_target_2_clamp); + v3d_setup_render_target(job, 3, + &rt.render_target_3_internal_bpp, + &rt.render_target_3_internal_type, + &rt.render_target_3_clamp); + } +#endif + +#if V3D_VERSION < 40 + /* TODO: Don't bother emitting if we don't load/clear Z/S. */ + if (job->zsbuf) { + struct pipe_surface *psurf = job->zsbuf; + struct vc5_surface *surf = vc5_surface(psurf); + struct vc5_resource *rsc = vc5_resource(psurf->texture); + + v3d_emit_z_stencil_config(job, surf, rsc, false); + + /* Emit the separate stencil packet if we have a resource for + * it. The HW will only load/store this buffer if the + * Z/Stencil config doesn't have stencil in its format. + */ + if (surf->separate_stencil) { + v3d_emit_z_stencil_config(job, + vc5_surface(surf->separate_stencil), + rsc->separate_stencil, true); + } + } +#endif /* V3D_VERSION < 40 */ + + /* Ends rendering mode config. */ + cl_emit(&job->rcl, TILE_RENDERING_MODE_CONFIGURATION_Z_STENCIL_CLEAR_VALUES, + clear) { + clear.z_clear_value = job->clear_z; + clear.stencil_vg_mask_clear_value = job->clear_s; + }; + + /* Always set initial block size before the first branch, which needs + * to match the value from binning mode config. + */ + cl_emit(&job->rcl, TILE_LIST_INITIAL_BLOCK_SIZE, init) { + init.use_auto_chained_tile_lists = true; + init.size_of_first_block_in_chained_tile_lists = + TILE_ALLOCATION_BLOCK_SIZE_64B; + } + + uint32_t supertile_w = 1, supertile_h = 1; + + /* If doing multicore binning, we would need to initialize each core's + * tile list here. + */ + cl_emit(&job->rcl, MULTICORE_RENDERING_TILE_LIST_SET_BASE, list) { + list.address = cl_address(job->tile_alloc, 0); + } + + cl_emit(&job->rcl, MULTICORE_RENDERING_SUPERTILE_CONFIGURATION, config) { + uint32_t frame_w_in_supertiles, frame_h_in_supertiles; + const uint32_t max_supertiles = 256; + + /* Size up our supertiles until we get under the limit. */ + for (;;) { + frame_w_in_supertiles = div_round_up(job->draw_tiles_x, + supertile_w); + frame_h_in_supertiles = div_round_up(job->draw_tiles_y, + supertile_h); + if (frame_w_in_supertiles * frame_h_in_supertiles < + max_supertiles) { + break; + } + + if (supertile_w < supertile_h) + supertile_w++; + else + supertile_h++; + } + + config.total_frame_width_in_tiles = job->draw_tiles_x; + config.total_frame_height_in_tiles = job->draw_tiles_y; + + config.supertile_width_in_tiles_minus_1 = supertile_w - 1; + config.supertile_height_in_tiles_minus_1 = supertile_h - 1; + + config.total_frame_width_in_supertiles = frame_w_in_supertiles; + config.total_frame_height_in_supertiles = frame_h_in_supertiles; + } + + /* Start by clearing the tile buffer. */ + cl_emit(&job->rcl, TILE_COORDINATES, coords) { + coords.tile_column_number = 0; + coords.tile_row_number = 0; + } + + /* Emit an initial clear of the tile buffers. This is necessary for + * any buffers that should be cleared (since clearing normally happens + * at the *end* of the generic tile list), but it's also nice to clear + * everything so the first tile doesn't inherit any contents from some + * previous frame. + * + * Also, implement the GFXH-1742 workaround. There's a race in the HW + * between the RCL updating the TLB's internal type/size and the + * spawning of the QPU instances using the TLB's current internal + * type/size. To make sure the QPUs get the right state,, we need 1 + * dummy store in between internal type/size changes on V3D 3.x, and 2 + * dummy stores on 4.x. + */ +#if V3D_VERSION < 40 + cl_emit(&job->rcl, STORE_TILE_BUFFER_GENERAL, store) { + store.buffer_to_store = NONE; + } +#else + for (int i = 0; i < 2; i++) { + if (i > 0) + cl_emit(&job->rcl, TILE_COORDINATES, coords); + cl_emit(&job->rcl, END_OF_LOADS, end); + cl_emit(&job->rcl, STORE_TILE_BUFFER_GENERAL, store) { + store.buffer_to_store = NONE; + } + if (i == 0) { + cl_emit(&job->rcl, CLEAR_TILE_BUFFERS, clear) { + clear.clear_z_stencil_buffer = true; + clear.clear_all_render_targets = true; + } + } + cl_emit(&job->rcl, END_OF_TILE_MARKER, end); + } +#endif + + cl_emit(&job->rcl, FLUSH_VCD_CACHE, flush); + + vc5_rcl_emit_generic_per_tile_list(job, nr_cbufs - 1); + + cl_emit(&job->rcl, WAIT_ON_SEMAPHORE, sem); + + /* XXX: Use Morton order */ + uint32_t supertile_w_in_pixels = job->tile_width * supertile_w; + uint32_t supertile_h_in_pixels = job->tile_height * supertile_h; + uint32_t min_x_supertile = job->draw_min_x / supertile_w_in_pixels; + uint32_t min_y_supertile = job->draw_min_y / supertile_h_in_pixels; + + uint32_t max_x_supertile = 0; + uint32_t max_y_supertile = 0; + if (job->draw_max_x != 0 && job->draw_max_y != 0) { + max_x_supertile = (job->draw_max_x - 1) / supertile_w_in_pixels; + max_y_supertile = (job->draw_max_y - 1) / supertile_h_in_pixels; + } + + for (int y = min_y_supertile; y <= max_y_supertile; y++) { + for (int x = min_x_supertile; x <= max_x_supertile; x++) { + cl_emit(&job->rcl, SUPERTILE_COORDINATES, coords) { + coords.column_number_in_supertiles = x; + coords.row_number_in_supertiles = y; + } + } + } + + cl_emit(&job->rcl, END_OF_RENDERING, end); +} diff --git a/src/gallium/drivers/v3d/v3dx_simulator.c b/src/gallium/drivers/v3d/v3dx_simulator.c new file mode 100644 index 00000000000..ee8b6f2b9fd --- /dev/null +++ b/src/gallium/drivers/v3d/v3dx_simulator.c @@ -0,0 +1,190 @@ +/* + * Copyright © 2014-2017 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/** + * @file vc5_simulator_hw.c + * + * Implements the actual HW interaction betweeh the GL driver's VC5 simulator and the simulator. + * + * The register headers between V3D versions will have conflicting defines, so + * all register interactions appear in this file and are compiled per V3D version + * we support. + */ + +#ifdef USE_V3D_SIMULATOR + +#include "v3d_screen.h" +#include "v3d_context.h" +#include "v3d_simulator_wrapper.h" + +#define HW_REGISTER_RO(x) (x) +#define HW_REGISTER_RW(x) (x) +#if V3D_VERSION >= 41 +#include "libs/core/v3d/registers/4.1.34.0/v3d.h" +#else +#include "libs/core/v3d/registers/3.3.0.0/v3d.h" +#endif + +#define V3D_WRITE(reg, val) v3d_hw_write_reg(v3d, reg, val) +#define V3D_READ(reg) v3d_hw_read_reg(v3d, reg) + +static void +vc5_flush_l3(struct v3d_hw *v3d) +{ + if (!v3d_hw_has_gca(v3d)) + return; + +#if V3D_VERSION < 40 + uint32_t gca_ctrl = V3D_READ(V3D_GCA_CACHE_CTRL); + + V3D_WRITE(V3D_GCA_CACHE_CTRL, gca_ctrl | V3D_GCA_CACHE_CTRL_FLUSH_SET); + V3D_WRITE(V3D_GCA_CACHE_CTRL, gca_ctrl & ~V3D_GCA_CACHE_CTRL_FLUSH_SET); +#endif +} + +/* Invalidates the L2 cache. This is a read-only cache. */ +static void +vc5_flush_l2(struct v3d_hw *v3d) +{ + V3D_WRITE(V3D_CTL_0_L2CACTL, + V3D_CTL_0_L2CACTL_L2CCLR_SET | + V3D_CTL_0_L2CACTL_L2CENA_SET); +} + +/* Invalidates texture L2 cachelines */ +static void +vc5_flush_l2t(struct v3d_hw *v3d) +{ + V3D_WRITE(V3D_CTL_0_L2TFLSTA, 0); + V3D_WRITE(V3D_CTL_0_L2TFLEND, ~0); + V3D_WRITE(V3D_CTL_0_L2TCACTL, + V3D_CTL_0_L2TCACTL_L2TFLS_SET | + (0 << V3D_CTL_0_L2TCACTL_L2TFLM_LSB)); +} + +/* Invalidates the slice caches. These are read-only caches. */ +static void +vc5_flush_slices(struct v3d_hw *v3d) +{ + V3D_WRITE(V3D_CTL_0_SLCACTL, ~0); +} + +static void +vc5_flush_caches(struct v3d_hw *v3d) +{ + vc5_flush_l3(v3d); + vc5_flush_l2(v3d); + vc5_flush_l2t(v3d); + vc5_flush_slices(v3d); +} + +int +v3dX(simulator_get_param_ioctl)(struct v3d_hw *v3d, + struct drm_v3d_get_param *args) +{ + static const uint32_t reg_map[] = { + [DRM_V3D_PARAM_V3D_UIFCFG] = V3D_HUB_CTL_UIFCFG, + [DRM_V3D_PARAM_V3D_HUB_IDENT1] = V3D_HUB_CTL_IDENT1, + [DRM_V3D_PARAM_V3D_HUB_IDENT2] = V3D_HUB_CTL_IDENT2, + [DRM_V3D_PARAM_V3D_HUB_IDENT3] = V3D_HUB_CTL_IDENT3, + [DRM_V3D_PARAM_V3D_CORE0_IDENT0] = V3D_CTL_0_IDENT0, + [DRM_V3D_PARAM_V3D_CORE0_IDENT1] = V3D_CTL_0_IDENT1, + [DRM_V3D_PARAM_V3D_CORE0_IDENT2] = V3D_CTL_0_IDENT2, + }; + + if (args->param < ARRAY_SIZE(reg_map) && reg_map[args->param]) { + args->value = V3D_READ(reg_map[args->param]); + return 0; + } + + fprintf(stderr, "Unknown DRM_IOCTL_VC5_GET_PARAM(%lld)\n", + (long long)args->value); + abort(); +} + +void +v3dX(simulator_init_regs)(struct v3d_hw *v3d) +{ +#if V3D_VERSION == 33 + /* Set OVRTMUOUT to match kernel behavior. + * + * This means that the texture sampler uniform configuration's tmu + * output type field is used, instead of using the hardware default + * behavior based on the texture type. If you want the default + * behavior, you can still put "2" in the indirect texture state's + * output_type field. + */ + V3D_WRITE(V3D_CTL_0_MISCCFG, V3D_CTL_1_MISCCFG_OVRTMUOUT_SET); +#endif +} + +void +v3dX(simulator_flush)(struct v3d_hw *v3d, struct drm_v3d_submit_cl *submit, + uint32_t gmp_ofs) +{ + /* Completely reset the GMP. */ + V3D_WRITE(V3D_GMP_0_CFG, + V3D_GMP_0_CFG_PROTENABLE_SET); + V3D_WRITE(V3D_GMP_0_TABLE_ADDR, gmp_ofs); + V3D_WRITE(V3D_GMP_0_CLEAR_LOAD, ~0); + while (V3D_READ(V3D_GMP_0_STATUS) & + V3D_GMP_0_STATUS_CFG_BUSY_SET) { + ; + } + + vc5_flush_caches(v3d); + + if (submit->qma) { + V3D_WRITE(V3D_CLE_0_CT0QMA, submit->qma); + V3D_WRITE(V3D_CLE_0_CT0QMS, submit->qms); + } +#if V3D_VERSION >= 41 + if (submit->qts) { + V3D_WRITE(V3D_CLE_0_CT0QTS, + V3D_CLE_0_CT0QTS_CTQTSEN_SET | + submit->qts); + } +#endif + V3D_WRITE(V3D_CLE_0_CT0QBA, submit->bcl_start); + V3D_WRITE(V3D_CLE_0_CT0QEA, submit->bcl_end); + + /* Wait for bin to complete before firing render, as it seems the + * simulator doesn't implement the semaphores. + */ + while (V3D_READ(V3D_CLE_0_CT0CA) != + V3D_READ(V3D_CLE_0_CT0EA)) { + v3d_hw_tick(v3d); + } + + V3D_WRITE(V3D_CLE_0_CT1QBA, submit->rcl_start); + V3D_WRITE(V3D_CLE_0_CT1QEA, submit->rcl_end); + + while (V3D_READ(V3D_CLE_0_CT1CA) != + V3D_READ(V3D_CLE_0_CT1EA) || + V3D_READ(V3D_CLE_1_CT1CA) != + V3D_READ(V3D_CLE_1_CT1EA)) { + v3d_hw_tick(v3d); + } +} + +#endif /* USE_V3D_SIMULATOR */ diff --git a/src/gallium/drivers/v3d/v3dx_state.c b/src/gallium/drivers/v3d/v3dx_state.c new file mode 100644 index 00000000000..e992796a218 --- /dev/null +++ b/src/gallium/drivers/v3d/v3dx_state.c @@ -0,0 +1,951 @@ +/* + * Copyright © 2014-2017 Broadcom + * Copyright (C) 2012 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "pipe/p_state.h" +#include "util/u_format.h" +#include "util/u_framebuffer.h" +#include "util/u_inlines.h" +#include "util/u_math.h" +#include "util/u_memory.h" +#include "util/u_half.h" +#include "util/u_helpers.h" + +#include "v3d_context.h" +#include "v3d_tiling.h" +#include "broadcom/common/v3d_macros.h" +#include "broadcom/cle/v3dx_pack.h" + +static void * +vc5_generic_cso_state_create(const void *src, uint32_t size) +{ + void *dst = calloc(1, size); + if (!dst) + return NULL; + memcpy(dst, src, size); + return dst; +} + +static void +vc5_generic_cso_state_delete(struct pipe_context *pctx, void *hwcso) +{ + free(hwcso); +} + +static void +vc5_set_blend_color(struct pipe_context *pctx, + const struct pipe_blend_color *blend_color) +{ + struct vc5_context *vc5 = vc5_context(pctx); + vc5->blend_color.f = *blend_color; + for (int i = 0; i < 4; i++) { + vc5->blend_color.hf[i] = + util_float_to_half(blend_color->color[i]); + } + vc5->dirty |= VC5_DIRTY_BLEND_COLOR; +} + +static void +vc5_set_stencil_ref(struct pipe_context *pctx, + const struct pipe_stencil_ref *stencil_ref) +{ + struct vc5_context *vc5 = vc5_context(pctx); + vc5->stencil_ref = *stencil_ref; + vc5->dirty |= VC5_DIRTY_STENCIL_REF; +} + +static void +vc5_set_clip_state(struct pipe_context *pctx, + const struct pipe_clip_state *clip) +{ + struct vc5_context *vc5 = vc5_context(pctx); + vc5->clip = *clip; + vc5->dirty |= VC5_DIRTY_CLIP; +} + +static void +vc5_set_sample_mask(struct pipe_context *pctx, unsigned sample_mask) +{ + struct vc5_context *vc5 = vc5_context(pctx); + vc5->sample_mask = sample_mask & ((1 << VC5_MAX_SAMPLES) - 1); + vc5->dirty |= VC5_DIRTY_SAMPLE_MASK; +} + +static uint16_t +float_to_187_half(float f) +{ + return fui(f) >> 16; +} + +static void * +vc5_create_rasterizer_state(struct pipe_context *pctx, + const struct pipe_rasterizer_state *cso) +{ + struct vc5_rasterizer_state *so; + + so = CALLOC_STRUCT(vc5_rasterizer_state); + if (!so) + return NULL; + + so->base = *cso; + + /* Workaround: HW-2726 PTB does not handle zero-size points (BCM2835, + * BCM21553). + */ + so->point_size = MAX2(cso->point_size, .125f); + + if (cso->offset_tri) { + so->offset_units = float_to_187_half(cso->offset_units); + so->offset_factor = float_to_187_half(cso->offset_scale); + } + + return so; +} + +/* Blend state is baked into shaders. */ +static void * +vc5_create_blend_state(struct pipe_context *pctx, + const struct pipe_blend_state *cso) +{ + return vc5_generic_cso_state_create(cso, sizeof(*cso)); +} + +static uint32_t +translate_stencil_op(enum pipe_stencil_op op) +{ + switch (op) { + case PIPE_STENCIL_OP_KEEP: return V3D_STENCIL_OP_KEEP; + case PIPE_STENCIL_OP_ZERO: return V3D_STENCIL_OP_ZERO; + case PIPE_STENCIL_OP_REPLACE: return V3D_STENCIL_OP_REPLACE; + case PIPE_STENCIL_OP_INCR: return V3D_STENCIL_OP_INCR; + case PIPE_STENCIL_OP_DECR: return V3D_STENCIL_OP_DECR; + case PIPE_STENCIL_OP_INCR_WRAP: return V3D_STENCIL_OP_INCWRAP; + case PIPE_STENCIL_OP_DECR_WRAP: return V3D_STENCIL_OP_DECWRAP; + case PIPE_STENCIL_OP_INVERT: return V3D_STENCIL_OP_INVERT; + } + unreachable("bad stencil op"); +} + +static void * +vc5_create_depth_stencil_alpha_state(struct pipe_context *pctx, + const struct pipe_depth_stencil_alpha_state *cso) +{ + struct vc5_depth_stencil_alpha_state *so; + + so = CALLOC_STRUCT(vc5_depth_stencil_alpha_state); + if (!so) + return NULL; + + so->base = *cso; + + if (cso->depth.enabled) { + switch (cso->depth.func) { + case PIPE_FUNC_LESS: + case PIPE_FUNC_LEQUAL: + so->ez_state = VC5_EZ_LT_LE; + break; + case PIPE_FUNC_GREATER: + case PIPE_FUNC_GEQUAL: + so->ez_state = VC5_EZ_GT_GE; + break; + case PIPE_FUNC_NEVER: + case PIPE_FUNC_EQUAL: + so->ez_state = VC5_EZ_UNDECIDED; + break; + default: + so->ez_state = VC5_EZ_DISABLED; + break; + } + + /* If stencil is enabled and it's not a no-op, then it would + * break EZ updates. + */ + if (cso->stencil[0].enabled && + (cso->stencil[0].zfail_op != PIPE_STENCIL_OP_KEEP || + cso->stencil[0].func != PIPE_FUNC_ALWAYS || + (cso->stencil[1].enabled && + (cso->stencil[1].zfail_op != PIPE_STENCIL_OP_KEEP && + cso->stencil[1].func != PIPE_FUNC_ALWAYS)))) { + so->ez_state = VC5_EZ_DISABLED; + } + } + + const struct pipe_stencil_state *front = &cso->stencil[0]; + const struct pipe_stencil_state *back = &cso->stencil[1]; + + if (front->enabled) { + v3dx_pack(&so->stencil_front, STENCIL_CONFIG, config) { + config.front_config = true; + /* If !back->enabled, then the front values should be + * used for both front and back-facing primitives. + */ + config.back_config = !back->enabled; + + config.stencil_write_mask = front->writemask; + config.stencil_test_mask = front->valuemask; + + config.stencil_test_function = front->func; + config.stencil_pass_op = + translate_stencil_op(front->zpass_op); + config.depth_test_fail_op = + translate_stencil_op(front->zfail_op); + config.stencil_test_fail_op = + translate_stencil_op(front->fail_op); + } + } + if (back->enabled) { + v3dx_pack(&so->stencil_back, STENCIL_CONFIG, config) { + config.front_config = false; + config.back_config = true; + + config.stencil_write_mask = back->writemask; + config.stencil_test_mask = back->valuemask; + + config.stencil_test_function = back->func; + config.stencil_pass_op = + translate_stencil_op(back->zpass_op); + config.depth_test_fail_op = + translate_stencil_op(back->zfail_op); + config.stencil_test_fail_op = + translate_stencil_op(back->fail_op); + } + } + + return so; +} + +static void +vc5_set_polygon_stipple(struct pipe_context *pctx, + const struct pipe_poly_stipple *stipple) +{ + struct vc5_context *vc5 = vc5_context(pctx); + vc5->stipple = *stipple; + vc5->dirty |= VC5_DIRTY_STIPPLE; +} + +static void +vc5_set_scissor_states(struct pipe_context *pctx, + unsigned start_slot, + unsigned num_scissors, + const struct pipe_scissor_state *scissor) +{ + struct vc5_context *vc5 = vc5_context(pctx); + + vc5->scissor = *scissor; + vc5->dirty |= VC5_DIRTY_SCISSOR; +} + +static void +vc5_set_viewport_states(struct pipe_context *pctx, + unsigned start_slot, + unsigned num_viewports, + const struct pipe_viewport_state *viewport) +{ + struct vc5_context *vc5 = vc5_context(pctx); + vc5->viewport = *viewport; + vc5->dirty |= VC5_DIRTY_VIEWPORT; +} + +static void +vc5_set_vertex_buffers(struct pipe_context *pctx, + unsigned start_slot, unsigned count, + const struct pipe_vertex_buffer *vb) +{ + struct vc5_context *vc5 = vc5_context(pctx); + struct vc5_vertexbuf_stateobj *so = &vc5->vertexbuf; + + util_set_vertex_buffers_mask(so->vb, &so->enabled_mask, vb, + start_slot, count); + so->count = util_last_bit(so->enabled_mask); + + vc5->dirty |= VC5_DIRTY_VTXBUF; +} + +static void +vc5_blend_state_bind(struct pipe_context *pctx, void *hwcso) +{ + struct vc5_context *vc5 = vc5_context(pctx); + vc5->blend = hwcso; + vc5->dirty |= VC5_DIRTY_BLEND; +} + +static void +vc5_rasterizer_state_bind(struct pipe_context *pctx, void *hwcso) +{ + struct vc5_context *vc5 = vc5_context(pctx); + vc5->rasterizer = hwcso; + vc5->dirty |= VC5_DIRTY_RASTERIZER; +} + +static void +vc5_zsa_state_bind(struct pipe_context *pctx, void *hwcso) +{ + struct vc5_context *vc5 = vc5_context(pctx); + vc5->zsa = hwcso; + vc5->dirty |= VC5_DIRTY_ZSA; +} + +static void * +vc5_vertex_state_create(struct pipe_context *pctx, unsigned num_elements, + const struct pipe_vertex_element *elements) +{ + struct vc5_context *vc5 = vc5_context(pctx); + struct vc5_vertex_stateobj *so = CALLOC_STRUCT(vc5_vertex_stateobj); + + if (!so) + return NULL; + + memcpy(so->pipe, elements, sizeof(*elements) * num_elements); + so->num_elements = num_elements; + + for (int i = 0; i < so->num_elements; i++) { + const struct pipe_vertex_element *elem = &elements[i]; + const struct util_format_description *desc = + util_format_description(elem->src_format); + uint32_t r_size = desc->channel[0].size; + + const uint32_t size = + cl_packet_length(GL_SHADER_STATE_ATTRIBUTE_RECORD); + + v3dx_pack(&so->attrs[i * size], + GL_SHADER_STATE_ATTRIBUTE_RECORD, attr) { + /* vec_size == 0 means 4 */ + attr.vec_size = desc->nr_channels & 3; + attr.signed_int_type = (desc->channel[0].type == + UTIL_FORMAT_TYPE_SIGNED); + + attr.normalized_int_type = desc->channel[0].normalized; + attr.read_as_int_uint = desc->channel[0].pure_integer; + attr.instance_divisor = MIN2(elem->instance_divisor, + 0xffff); + + switch (desc->channel[0].type) { + case UTIL_FORMAT_TYPE_FLOAT: + if (r_size == 32) { + attr.type = ATTRIBUTE_FLOAT; + } else { + assert(r_size == 16); + attr.type = ATTRIBUTE_HALF_FLOAT; + } + break; + + case UTIL_FORMAT_TYPE_SIGNED: + case UTIL_FORMAT_TYPE_UNSIGNED: + switch (r_size) { + case 32: + attr.type = ATTRIBUTE_INT; + break; + case 16: + attr.type = ATTRIBUTE_SHORT; + break; + case 10: + attr.type = ATTRIBUTE_INT2_10_10_10; + break; + case 8: + attr.type = ATTRIBUTE_BYTE; + break; + default: + fprintf(stderr, + "format %s unsupported\n", + desc->name); + attr.type = ATTRIBUTE_BYTE; + abort(); + } + break; + + default: + fprintf(stderr, + "format %s unsupported\n", + desc->name); + abort(); + } + } + } + + /* Set up the default attribute values in case any of the vertex + * elements use them. + */ + so->default_attribute_values = vc5_bo_alloc(vc5->screen, + VC5_MAX_ATTRIBUTES * + 4 * sizeof(float), + "default attributes"); + uint32_t *attrs = vc5_bo_map(so->default_attribute_values); + for (int i = 0; i < VC5_MAX_ATTRIBUTES; i++) { + attrs[i * 4 + 0] = 0; + attrs[i * 4 + 1] = 0; + attrs[i * 4 + 2] = 0; + if (i < so->num_elements && + util_format_is_pure_integer(so->pipe[i].src_format)) { + attrs[i * 4 + 3] = 1; + } else { + attrs[i * 4 + 3] = fui(1.0); + } + } + + return so; +} + +static void +vc5_vertex_state_bind(struct pipe_context *pctx, void *hwcso) +{ + struct vc5_context *vc5 = vc5_context(pctx); + vc5->vtx = hwcso; + vc5->dirty |= VC5_DIRTY_VTXSTATE; +} + +static void +vc5_set_constant_buffer(struct pipe_context *pctx, uint shader, uint index, + const struct pipe_constant_buffer *cb) +{ + struct vc5_context *vc5 = vc5_context(pctx); + struct vc5_constbuf_stateobj *so = &vc5->constbuf[shader]; + + util_copy_constant_buffer(&so->cb[index], cb); + + /* Note that the state tracker can unbind constant buffers by + * passing NULL here. + */ + if (unlikely(!cb)) { + so->enabled_mask &= ~(1 << index); + so->dirty_mask &= ~(1 << index); + return; + } + + so->enabled_mask |= 1 << index; + so->dirty_mask |= 1 << index; + vc5->dirty |= VC5_DIRTY_CONSTBUF; +} + +static void +vc5_set_framebuffer_state(struct pipe_context *pctx, + const struct pipe_framebuffer_state *framebuffer) +{ + struct vc5_context *vc5 = vc5_context(pctx); + struct pipe_framebuffer_state *cso = &vc5->framebuffer; + + vc5->job = NULL; + + util_copy_framebuffer_state(cso, framebuffer); + + vc5->swap_color_rb = 0; + vc5->blend_dst_alpha_one = 0; + for (int i = 0; i < vc5->framebuffer.nr_cbufs; i++) { + struct pipe_surface *cbuf = vc5->framebuffer.cbufs[i]; + if (!cbuf) + continue; + + const struct util_format_description *desc = + util_format_description(cbuf->format); + + /* For BGRA8 formats (DRI window system default format), we + * need to swap R and B, since the HW's format is RGBA8. + */ + if (desc->swizzle[0] == PIPE_SWIZZLE_Z && + cbuf->format != PIPE_FORMAT_B5G6R5_UNORM) { + vc5->swap_color_rb |= 1 << i; + } + + if (desc->swizzle[3] == PIPE_SWIZZLE_1) + vc5->blend_dst_alpha_one |= 1 << i; + } + + vc5->dirty |= VC5_DIRTY_FRAMEBUFFER; +} + +static struct vc5_texture_stateobj * +vc5_get_stage_tex(struct vc5_context *vc5, enum pipe_shader_type shader) +{ + switch (shader) { + case PIPE_SHADER_FRAGMENT: + vc5->dirty |= VC5_DIRTY_FRAGTEX; + return &vc5->fragtex; + break; + case PIPE_SHADER_VERTEX: + vc5->dirty |= VC5_DIRTY_VERTTEX; + return &vc5->verttex; + break; + default: + fprintf(stderr, "Unknown shader target %d\n", shader); + abort(); + } +} + +static uint32_t translate_wrap(uint32_t pipe_wrap, bool using_nearest) +{ + switch (pipe_wrap) { + case PIPE_TEX_WRAP_REPEAT: + return 0; + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + return 1; + case PIPE_TEX_WRAP_MIRROR_REPEAT: + return 2; + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: + return 3; + case PIPE_TEX_WRAP_CLAMP: + return (using_nearest ? 1 : 3); + default: + unreachable("Unknown wrap mode"); + } +} + + +static void * +vc5_create_sampler_state(struct pipe_context *pctx, + const struct pipe_sampler_state *cso) +{ + MAYBE_UNUSED struct vc5_context *vc5 = vc5_context(pctx); + struct vc5_sampler_state *so = CALLOC_STRUCT(vc5_sampler_state); + + if (!so) + return NULL; + + memcpy(so, cso, sizeof(*cso)); + + bool either_nearest = + (cso->mag_img_filter == PIPE_TEX_MIPFILTER_NEAREST || + cso->min_img_filter == PIPE_TEX_MIPFILTER_NEAREST); + +#if V3D_VERSION >= 40 + so->bo = vc5_bo_alloc(vc5->screen, cl_packet_length(SAMPLER_STATE), + "sampler"); + void *map = vc5_bo_map(so->bo); + + v3dx_pack(map, SAMPLER_STATE, sampler) { + sampler.wrap_i_border = false; + + sampler.wrap_s = translate_wrap(cso->wrap_s, either_nearest); + sampler.wrap_t = translate_wrap(cso->wrap_t, either_nearest); + sampler.wrap_r = translate_wrap(cso->wrap_r, either_nearest); + + sampler.fixed_bias = cso->lod_bias; + sampler.depth_compare_function = cso->compare_func; + + sampler.min_filter_nearest = + cso->min_img_filter == PIPE_TEX_FILTER_NEAREST; + sampler.mag_filter_nearest = + cso->mag_img_filter == PIPE_TEX_FILTER_NEAREST; + sampler.mip_filter_nearest = + cso->min_mip_filter != PIPE_TEX_MIPFILTER_LINEAR; + + sampler.min_level_of_detail = MIN2(MAX2(0, cso->min_lod), + 15); + sampler.max_level_of_detail = MIN2(cso->max_lod, 15); + + if (cso->min_mip_filter == PIPE_TEX_MIPFILTER_NONE) { + sampler.min_level_of_detail = 0; + sampler.max_level_of_detail = 0; + } + + if (cso->max_anisotropy) { + sampler.anisotropy_enable = true; + + if (cso->max_anisotropy > 8) + sampler.maximum_anisotropy = 3; + else if (cso->max_anisotropy > 4) + sampler.maximum_anisotropy = 2; + else if (cso->max_anisotropy > 2) + sampler.maximum_anisotropy = 1; + } + + sampler.border_colour_mode = V3D_BORDER_COLOUR_FOLLOWS; + /* XXX: The border colour field is in the TMU blending format + * (32, f16, or i16), and we need to customize it based on + * that. + * + * XXX: for compat alpha formats, we need the alpha field to + * be in the red channel. + */ + sampler.border_colour_red = + util_float_to_half(cso->border_color.f[0]); + sampler.border_colour_green = + util_float_to_half(cso->border_color.f[1]); + sampler.border_colour_blue = + util_float_to_half(cso->border_color.f[2]); + sampler.border_colour_alpha = + util_float_to_half(cso->border_color.f[3]); + } + +#else /* V3D_VERSION < 40 */ + v3dx_pack(&so->p0, TEXTURE_UNIFORM_PARAMETER_0_CFG_MODE1, p0) { + p0.s_wrap_mode = translate_wrap(cso->wrap_s, either_nearest); + p0.t_wrap_mode = translate_wrap(cso->wrap_t, either_nearest); + p0.r_wrap_mode = translate_wrap(cso->wrap_r, either_nearest); + } + + v3dx_pack(&so->texture_shader_state, TEXTURE_SHADER_STATE, tex) { + tex.depth_compare_function = cso->compare_func; + tex.fixed_bias = cso->lod_bias; + } +#endif /* V3D_VERSION < 40 */ + return so; +} + +static void +vc5_sampler_states_bind(struct pipe_context *pctx, + enum pipe_shader_type shader, unsigned start, + unsigned nr, void **hwcso) +{ + struct vc5_context *vc5 = vc5_context(pctx); + struct vc5_texture_stateobj *stage_tex = vc5_get_stage_tex(vc5, shader); + + assert(start == 0); + unsigned i; + unsigned new_nr = 0; + + for (i = 0; i < nr; i++) { + if (hwcso[i]) + new_nr = i + 1; + stage_tex->samplers[i] = hwcso[i]; + } + + for (; i < stage_tex->num_samplers; i++) { + stage_tex->samplers[i] = NULL; + } + + stage_tex->num_samplers = new_nr; +} + +static void +vc5_sampler_state_delete(struct pipe_context *pctx, + void *hwcso) +{ + struct pipe_sampler_state *psampler = hwcso; + struct vc5_sampler_state *sampler = vc5_sampler_state(psampler); + + vc5_bo_unreference(&sampler->bo); + free(psampler); +} + +#if V3D_VERSION >= 40 +static uint32_t +translate_swizzle(unsigned char pipe_swizzle) +{ + switch (pipe_swizzle) { + case PIPE_SWIZZLE_0: + return 0; + case PIPE_SWIZZLE_1: + return 1; + case PIPE_SWIZZLE_X: + case PIPE_SWIZZLE_Y: + case PIPE_SWIZZLE_Z: + case PIPE_SWIZZLE_W: + return 2 + pipe_swizzle; + default: + unreachable("unknown swizzle"); + } +} +#endif + +static struct pipe_sampler_view * +vc5_create_sampler_view(struct pipe_context *pctx, struct pipe_resource *prsc, + const struct pipe_sampler_view *cso) +{ + struct vc5_context *vc5 = vc5_context(pctx); + struct vc5_screen *screen = vc5->screen; + struct vc5_sampler_view *so = CALLOC_STRUCT(vc5_sampler_view); + struct vc5_resource *rsc = vc5_resource(prsc); + + if (!so) + return NULL; + + so->base = *cso; + + pipe_reference(NULL, &prsc->reference); + + /* Compute the sampler view's swizzle up front. This will be plugged + * into either the sampler (for 16-bit returns) or the shader's + * texture key (for 32) + */ + uint8_t view_swizzle[4] = { + cso->swizzle_r, + cso->swizzle_g, + cso->swizzle_b, + cso->swizzle_a + }; + const uint8_t *fmt_swizzle = + vc5_get_format_swizzle(&screen->devinfo, so->base.format); + util_format_compose_swizzles(fmt_swizzle, view_swizzle, so->swizzle); + + so->base.texture = prsc; + so->base.reference.count = 1; + so->base.context = pctx; + + int msaa_scale = prsc->nr_samples > 1 ? 2 : 1; + +#if V3D_VERSION >= 40 + so->bo = vc5_bo_alloc(vc5->screen, cl_packet_length(SAMPLER_STATE), + "sampler"); + void *map = vc5_bo_map(so->bo); + + v3dx_pack(map, TEXTURE_SHADER_STATE, tex) { +#else /* V3D_VERSION < 40 */ + v3dx_pack(&so->texture_shader_state, TEXTURE_SHADER_STATE, tex) { +#endif + + tex.image_width = prsc->width0 * msaa_scale; + tex.image_height = prsc->height0 * msaa_scale; + +#if V3D_VERSION >= 40 + /* On 4.x, the height of a 1D texture is redefined to be the + * upper 14 bits of the width (which is only usable with txf). + */ + if (prsc->target == PIPE_TEXTURE_1D || + prsc->target == PIPE_TEXTURE_1D_ARRAY) { + tex.image_height = tex.image_width >> 14; + } +#endif + + if (prsc->target == PIPE_TEXTURE_3D) { + tex.image_depth = prsc->depth0; + } else { + tex.image_depth = (cso->u.tex.last_layer - + cso->u.tex.first_layer) + 1; + } + + tex.srgb = util_format_is_srgb(cso->format); + + tex.base_level = cso->u.tex.first_level; +#if V3D_VERSION >= 40 + tex.max_level = cso->u.tex.last_level; + /* Note that we don't have a job to reference the texture's sBO + * at state create time, so any time this sampler view is used + * we need to add the texture to the job. + */ + tex.texture_base_pointer = cl_address(NULL, + rsc->bo->offset + + rsc->slices[0].offset), + + tex.swizzle_r = translate_swizzle(so->swizzle[0]); + tex.swizzle_g = translate_swizzle(so->swizzle[1]); + tex.swizzle_b = translate_swizzle(so->swizzle[2]); + tex.swizzle_a = translate_swizzle(so->swizzle[3]); +#endif + tex.array_stride_64_byte_aligned = rsc->cube_map_stride / 64; + + if (prsc->nr_samples > 1 && V3D_VERSION < 40) { + /* Using texture views to reinterpret formats on our + * MSAA textures won't work, because we don't lay out + * the bits in memory as it's expected -- for example, + * RGBA8 and RGB10_A2 are compatible in the + * ARB_texture_view spec, but in HW we lay them out as + * 32bpp RGBA8 and 64bpp RGBA16F. Just assert for now + * to catch failures. + * + * We explicitly allow remapping S8Z24 to RGBA8888 for + * vc5_blit.c's stencil blits. + */ + assert((util_format_linear(cso->format) == + util_format_linear(prsc->format)) || + (prsc->format == PIPE_FORMAT_S8_UINT_Z24_UNORM && + cso->format == PIPE_FORMAT_R8G8B8A8_UNORM)); + uint32_t output_image_format = + vc5_get_rt_format(&screen->devinfo, cso->format); + uint32_t internal_type; + uint32_t internal_bpp; + vc5_get_internal_type_bpp_for_output_format(&screen->devinfo, + output_image_format, + &internal_type, + &internal_bpp); + + switch (internal_type) { + case V3D_INTERNAL_TYPE_8: + tex.texture_type = TEXTURE_DATA_FORMAT_RGBA8; + break; + case V3D_INTERNAL_TYPE_16F: + tex.texture_type = TEXTURE_DATA_FORMAT_RGBA16F; + break; + default: + unreachable("Bad MSAA texture type"); + } + + /* sRGB was stored in the tile buffer as linear and + * would have been encoded to sRGB on resolved tile + * buffer store. Note that this means we would need + * shader code if we wanted to read an MSAA sRGB + * texture without sRGB decode. + */ + tex.srgb = false; + } else { + tex.texture_type = vc5_get_tex_format(&screen->devinfo, + cso->format); + } + + /* Since other platform devices may produce UIF images even + * when they're not big enough for V3D to assume they're UIF, + * we force images with level 0 as UIF to be always treated + * that way. + */ + tex.level_0_is_strictly_uif = (rsc->slices[0].tiling == + VC5_TILING_UIF_XOR || + rsc->slices[0].tiling == + VC5_TILING_UIF_NO_XOR); + tex.level_0_xor_enable = (rsc->slices[0].tiling == + VC5_TILING_UIF_XOR); + + if (tex.level_0_is_strictly_uif) + tex.level_0_ub_pad = rsc->slices[0].ub_pad; + +#if V3D_VERSION >= 40 + if (tex.uif_xor_disable || + tex.level_0_is_strictly_uif) { + tex.extended = true; + } +#endif /* V3D_VERSION >= 40 */ + }; + + return &so->base; +} + +static void +vc5_sampler_view_destroy(struct pipe_context *pctx, + struct pipe_sampler_view *psview) +{ + struct vc5_sampler_view *sview = vc5_sampler_view(psview); + + vc5_bo_unreference(&sview->bo); + pipe_resource_reference(&psview->texture, NULL); + free(psview); +} + +static void +vc5_set_sampler_views(struct pipe_context *pctx, + enum pipe_shader_type shader, + unsigned start, unsigned nr, + struct pipe_sampler_view **views) +{ + struct vc5_context *vc5 = vc5_context(pctx); + struct vc5_texture_stateobj *stage_tex = vc5_get_stage_tex(vc5, shader); + unsigned i; + unsigned new_nr = 0; + + assert(start == 0); + + for (i = 0; i < nr; i++) { + if (views[i]) + new_nr = i + 1; + pipe_sampler_view_reference(&stage_tex->textures[i], views[i]); + } + + for (; i < stage_tex->num_textures; i++) { + pipe_sampler_view_reference(&stage_tex->textures[i], NULL); + } + + stage_tex->num_textures = new_nr; +} + +static struct pipe_stream_output_target * +vc5_create_stream_output_target(struct pipe_context *pctx, + struct pipe_resource *prsc, + unsigned buffer_offset, + unsigned buffer_size) +{ + struct pipe_stream_output_target *target; + + target = CALLOC_STRUCT(pipe_stream_output_target); + if (!target) + return NULL; + + pipe_reference_init(&target->reference, 1); + pipe_resource_reference(&target->buffer, prsc); + + target->context = pctx; + target->buffer_offset = buffer_offset; + target->buffer_size = buffer_size; + + return target; +} + +static void +vc5_stream_output_target_destroy(struct pipe_context *pctx, + struct pipe_stream_output_target *target) +{ + pipe_resource_reference(&target->buffer, NULL); + free(target); +} + +static void +vc5_set_stream_output_targets(struct pipe_context *pctx, + unsigned num_targets, + struct pipe_stream_output_target **targets, + const unsigned *offsets) +{ + struct vc5_context *ctx = vc5_context(pctx); + struct vc5_streamout_stateobj *so = &ctx->streamout; + unsigned i; + + assert(num_targets <= ARRAY_SIZE(so->targets)); + + for (i = 0; i < num_targets; i++) + pipe_so_target_reference(&so->targets[i], targets[i]); + + for (; i < so->num_targets; i++) + pipe_so_target_reference(&so->targets[i], NULL); + + so->num_targets = num_targets; + + ctx->dirty |= VC5_DIRTY_STREAMOUT; +} + +void +v3dX(state_init)(struct pipe_context *pctx) +{ + pctx->set_blend_color = vc5_set_blend_color; + pctx->set_stencil_ref = vc5_set_stencil_ref; + pctx->set_clip_state = vc5_set_clip_state; + pctx->set_sample_mask = vc5_set_sample_mask; + pctx->set_constant_buffer = vc5_set_constant_buffer; + pctx->set_framebuffer_state = vc5_set_framebuffer_state; + pctx->set_polygon_stipple = vc5_set_polygon_stipple; + pctx->set_scissor_states = vc5_set_scissor_states; + pctx->set_viewport_states = vc5_set_viewport_states; + + pctx->set_vertex_buffers = vc5_set_vertex_buffers; + + pctx->create_blend_state = vc5_create_blend_state; + pctx->bind_blend_state = vc5_blend_state_bind; + pctx->delete_blend_state = vc5_generic_cso_state_delete; + + pctx->create_rasterizer_state = vc5_create_rasterizer_state; + pctx->bind_rasterizer_state = vc5_rasterizer_state_bind; + pctx->delete_rasterizer_state = vc5_generic_cso_state_delete; + + pctx->create_depth_stencil_alpha_state = vc5_create_depth_stencil_alpha_state; + pctx->bind_depth_stencil_alpha_state = vc5_zsa_state_bind; + pctx->delete_depth_stencil_alpha_state = vc5_generic_cso_state_delete; + + pctx->create_vertex_elements_state = vc5_vertex_state_create; + pctx->delete_vertex_elements_state = vc5_generic_cso_state_delete; + pctx->bind_vertex_elements_state = vc5_vertex_state_bind; + + pctx->create_sampler_state = vc5_create_sampler_state; + pctx->delete_sampler_state = vc5_sampler_state_delete; + pctx->bind_sampler_states = vc5_sampler_states_bind; + + pctx->create_sampler_view = vc5_create_sampler_view; + pctx->sampler_view_destroy = vc5_sampler_view_destroy; + pctx->set_sampler_views = vc5_set_sampler_views; + + pctx->create_stream_output_target = vc5_create_stream_output_target; + pctx->stream_output_target_destroy = vc5_stream_output_target_destroy; + pctx->set_stream_output_targets = vc5_set_stream_output_targets; +} diff --git a/src/gallium/drivers/vc5/.editorconfig b/src/gallium/drivers/vc5/.editorconfig deleted file mode 100644 index 5a9f3c041a4..00000000000 --- a/src/gallium/drivers/vc5/.editorconfig +++ /dev/null @@ -1,3 +0,0 @@ -[*.{c,h,cpp}] -indent_style = space -indent_size = 8 diff --git a/src/gallium/drivers/vc5/Automake.inc b/src/gallium/drivers/vc5/Automake.inc deleted file mode 100644 index 612bc21615e..00000000000 --- a/src/gallium/drivers/vc5/Automake.inc +++ /dev/null @@ -1,14 +0,0 @@ -if HAVE_GALLIUM_V3D - -TARGET_DRIVERS += v3d -TARGET_CPPFLAGS += -DGALLIUM_V3D -TARGET_LIB_DEPS += \ - $(top_builddir)/src/gallium/winsys/vc5/drm/libv3ddrm.la \ - $(top_builddir)/src/gallium/drivers/vc5/libv3d.la \ - $(top_builddir)/src/broadcom/libbroadcom.la - -if !HAVE_GALLIUM_VC4 -TARGET_LIB_DEPS += $(top_builddir)/src/broadcom/cle/libbroadcom_cle.la -endif - -endif diff --git a/src/gallium/drivers/vc5/Makefile.am b/src/gallium/drivers/vc5/Makefile.am deleted file mode 100644 index 2b4c364c24e..00000000000 --- a/src/gallium/drivers/vc5/Makefile.am +++ /dev/null @@ -1,56 +0,0 @@ -# Copyright © 2014 Broadcom -# -# Permission is hereby granted, free of charge, to any person obtaining a -# copy of this software and associated documentation files (the "Software"), -# to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, -# and/or sell copies of the Software, and to permit persons to whom the -# Software is furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice (including the next -# paragraph) shall be included in all copies or substantial portions of the -# Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS -# IN THE SOFTWARE. - -include Makefile.sources -include $(top_srcdir)/src/gallium/Automake.inc - -AM_CFLAGS = \ - -I$(top_builddir)/src/compiler/nir \ - -I$(top_builddir)/src/broadcom \ - $(LIBDRM_CFLAGS) \ - $(V3D_SIMULATOR_CFLAGS) \ - $(GALLIUM_DRIVER_CFLAGS) \ - $(VALGRIND_CFLAGS) \ - $() - -noinst_LTLIBRARIES = \ - libv3d.la \ - libv3d_v33.la \ - libv3d_v41.la \ - $() - -libv3d_v33_la_SOURCES = $(V3D_PER_VERSION_SOURCES) -libv3d_v33_la_CFLAGS = $(AM_CFLAGS) -DV3D_VERSION=33 - -libv3d_v41_la_SOURCES = $(V3D_PER_VERSION_SOURCES) -libv3d_v41_la_CFLAGS = $(AM_CFLAGS) -DV3D_VERSION=41 - -libv3d_la_SOURCES = $(C_SOURCES) - -libv3d_la_LDFLAGS = \ - $(V3D_SIMULATOR_LIBS) \ - $(NULL) -libv3d_la_LIBADD = \ - libv3d_v33.la \ - libv3d_v41.la \ - $() - -EXTRA_DIST = meson.build diff --git a/src/gallium/drivers/vc5/Makefile.sources b/src/gallium/drivers/vc5/Makefile.sources deleted file mode 100644 index 36fcc0b90be..00000000000 --- a/src/gallium/drivers/vc5/Makefile.sources +++ /dev/null @@ -1,36 +0,0 @@ -C_SOURCES := \ - vc5_blit.c \ - vc5_bufmgr.c \ - vc5_bufmgr.h \ - vc5_cl.c \ - vc5_cl.h \ - vc5_context.c \ - vc5_context.h \ - vc5_fence.c \ - vc5_formats.c \ - vc5_format_table.h \ - vc5_job.c \ - vc5_program.c \ - vc5_query.c \ - vc5_resource.c \ - vc5_resource.h \ - vc5_screen.c \ - vc5_screen.h \ - vc5_simulator.c \ - vc5_simulator_wrapper.cpp \ - vc5_simulator_wrapper.h \ - vc5_tiling.c \ - vc5_tiling.h \ - vc5_uniforms.c \ - $() - -V3D_PER_VERSION_SOURCES = \ - v3dx_context.h \ - v3dx_format_table.c \ - v3dx_job.c \ - v3dx_simulator.c \ - vc5_draw.c \ - vc5_emit.c \ - vc5_rcl.c \ - vc5_state.c \ - $() diff --git a/src/gallium/drivers/vc5/meson.build b/src/gallium/drivers/vc5/meson.build deleted file mode 100644 index 63460dc443f..00000000000 --- a/src/gallium/drivers/vc5/meson.build +++ /dev/null @@ -1,96 +0,0 @@ -# Copyright © 2017 Broadcom -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -files_libv3d = files( - 'vc5_blit.c', - 'vc5_bufmgr.c', - 'vc5_bufmgr.h', - 'vc5_cl.c', - 'vc5_cl.h', - 'vc5_context.c', - 'vc5_context.h', - 'vc5_fence.c', - 'vc5_formats.c', - 'vc5_job.c', - 'vc5_program.c', - 'vc5_query.c', - 'vc5_resource.c', - 'vc5_resource.h', - 'vc5_screen.c', - 'vc5_screen.h', - 'vc5_simulator.c', - 'vc5_simulator_wrapper.cpp', - 'vc5_tiling.c', - 'vc5_tiling.h', - 'vc5_uniforms.c', -) - -files_per_version = files( - 'v3dx_format_table.c', - 'v3dx_job.c', - 'v3dx_simulator.c', - 'vc5_draw.c', - 'vc5_emit.c', - 'vc5_rcl.c', - 'vc5_state.c', -) - -v3dv3_c_args = [] -dep_v3dv3 = dependency('v3dv3') -if dep_v3dv3.found() - v3dv3_c_args = '-DUSE_V3D_SIMULATOR' -endif - -v3d_versions = ['33', '41'] - -per_version_libs = [] -foreach ver : v3d_versions - per_version_libs += static_library( - 'v3d-v' + ver, - [files_per_version, v3d_xml_pack, nir_opcodes_h, nir_builder_opcodes_h], - include_directories : [ - inc_src, inc_include, inc_gallium, inc_gallium_aux, inc_broadcom, - inc_gallium_drivers, inc_drm_uapi, - ], - c_args : [c_vis_args, v3dv3_c_args, '-DV3D_VERSION=' + ver], - cpp_args : [cpp_vis_args], - dependencies : [dep_v3dv3, dep_libdrm, dep_valgrind], -) - -endforeach - -libv3d = static_library( - 'v3d', - [files_libv3d, v3d_xml_pack], - include_directories : [ - inc_src, inc_include, inc_gallium, inc_gallium_aux, inc_broadcom, - inc_gallium_drivers, inc_drm_uapi, - ], - c_args : [c_vis_args, v3dv3_c_args], - cpp_args : [cpp_vis_args, v3dv3_c_args], - dependencies : [dep_v3dv3, dep_libdrm, dep_valgrind, idep_nir_headers], - link_with: per_version_libs, -) - -driver_v3d = declare_dependency( - compile_args : '-DGALLIUM_V3D', - link_with : [libv3d, libv3dwinsys, libbroadcom_cle, libbroadcom_v3d], - dependencies : idep_nir, -) diff --git a/src/gallium/drivers/vc5/v3dx_context.h b/src/gallium/drivers/vc5/v3dx_context.h deleted file mode 100644 index 538aed68be0..00000000000 --- a/src/gallium/drivers/vc5/v3dx_context.h +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Copyright © 2014-2017 Broadcom - * Copyright (C) 2012 Rob Clark - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -/* This file generates the per-v3d-version function prototypes. It must only - * be included from vc5_context.h. - */ - -struct v3d_hw; -struct vc5_format; - -void v3dX(emit_state)(struct pipe_context *pctx); -void v3dX(emit_rcl)(struct vc5_job *job); -void v3dX(draw_init)(struct pipe_context *pctx); -void v3dX(state_init)(struct pipe_context *pctx); - -void v3dX(bcl_epilogue)(struct vc5_context *vc5, struct vc5_job *job); - -void v3dX(simulator_init_regs)(struct v3d_hw *v3d); -int v3dX(simulator_get_param_ioctl)(struct v3d_hw *v3d, - struct drm_v3d_get_param *args); -void v3dX(simulator_flush)(struct v3d_hw *v3d, struct drm_v3d_submit_cl *submit, - uint32_t gmp_ofs); -const struct vc5_format *v3dX(get_format_desc)(enum pipe_format f); -void v3dX(get_internal_type_bpp_for_output_format)(uint32_t format, - uint32_t *type, - uint32_t *bpp); diff --git a/src/gallium/drivers/vc5/v3dx_format_table.c b/src/gallium/drivers/vc5/v3dx_format_table.c deleted file mode 100644 index cc356fc3811..00000000000 --- a/src/gallium/drivers/vc5/v3dx_format_table.c +++ /dev/null @@ -1,318 +0,0 @@ -/* - * Copyright © 2014-2018 Broadcom - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "util/u_format.h" - -#include "vc5_context.h" -#include "broadcom/cle/v3dx_pack.h" -#include "broadcom/common/v3d_macros.h" -#include "vc5_format_table.h" - -#define SWIZ(x,y,z,w) { \ - PIPE_SWIZZLE_##x, \ - PIPE_SWIZZLE_##y, \ - PIPE_SWIZZLE_##z, \ - PIPE_SWIZZLE_##w \ -} - -#define FORMAT(pipe, rt, tex, swiz, return_size, return_channels) \ - [PIPE_FORMAT_##pipe] = { \ - true, \ - V3D_OUTPUT_IMAGE_FORMAT_##rt, \ - TEXTURE_DATA_FORMAT_##tex, \ - swiz, \ - return_size, \ - return_channels, \ - } - -#define SWIZ_X001 SWIZ(X, 0, 0, 1) -#define SWIZ_XY01 SWIZ(X, Y, 0, 1) -#define SWIZ_XYZ1 SWIZ(X, Y, Z, 1) -#define SWIZ_XYZW SWIZ(X, Y, Z, W) -#define SWIZ_YZWX SWIZ(Y, Z, W, X) -#define SWIZ_YZW1 SWIZ(Y, Z, W, 1) -#define SWIZ_ZYXW SWIZ(Z, Y, X, W) -#define SWIZ_ZYX1 SWIZ(Z, Y, X, 1) -#define SWIZ_XXXY SWIZ(X, X, X, Y) -#define SWIZ_XXX1 SWIZ(X, X, X, 1) -#define SWIZ_XXXX SWIZ(X, X, X, X) -#define SWIZ_000X SWIZ(0, 0, 0, X) - -static const struct vc5_format format_table[] = { - FORMAT(B8G8R8A8_UNORM, RGBA8, RGBA8, SWIZ_ZYXW, 16, 0), - FORMAT(B8G8R8X8_UNORM, RGBA8, RGBA8, SWIZ_ZYX1, 16, 0), - FORMAT(B8G8R8A8_SRGB, SRGB8_ALPHA8, RGBA8, SWIZ_ZYXW, 16, 0), - FORMAT(B8G8R8X8_SRGB, SRGB8_ALPHA8, RGBA8, SWIZ_ZYX1, 16, 0), - FORMAT(R8G8B8A8_UNORM, RGBA8, RGBA8, SWIZ_XYZW, 16, 0), - FORMAT(R8G8B8X8_UNORM, RGBA8, RGBA8, SWIZ_XYZ1, 16, 0), - FORMAT(R8G8B8A8_SNORM, NO, RGBA8_SNORM, SWIZ_XYZW, 16, 0), - FORMAT(R8G8B8X8_SNORM, NO, RGBA8_SNORM, SWIZ_XYZ1, 16, 0), - FORMAT(R10G10B10A2_UNORM, RGB10_A2, RGB10_A2, SWIZ_XYZW, 16, 0), - FORMAT(R10G10B10A2_UINT, RGB10_A2UI, RGB10_A2UI, SWIZ_XYZW, 16, 0), - - FORMAT(A4B4G4R4_UNORM, ABGR4444, RGBA4, SWIZ_XYZW, 16, 0), - - FORMAT(A1B5G5R5_UNORM, ABGR1555, RGB5_A1, SWIZ_XYZW, 16, 0), - FORMAT(X1B5G5R5_UNORM, ABGR1555, RGB5_A1, SWIZ_XYZ1, 16, 0), - FORMAT(B5G6R5_UNORM, BGR565, RGB565, SWIZ_XYZ1, 16, 0), - - FORMAT(R8_UNORM, R8, R8, SWIZ_X001, 16, 0), - FORMAT(R8_SNORM, NO, R8_SNORM, SWIZ_X001, 16, 0), - FORMAT(R8G8_UNORM, RG8, RG8, SWIZ_XY01, 16, 0), - FORMAT(R8G8_SNORM, NO, RG8_SNORM, SWIZ_XY01, 16, 0), - - FORMAT(R16_UNORM, NO, R16, SWIZ_X001, 32, 1), - FORMAT(R16_SNORM, NO, R16_SNORM, SWIZ_X001, 32, 1), - FORMAT(R16_FLOAT, R16F, R16F, SWIZ_X001, 16, 0), - FORMAT(R32_FLOAT, R32F, R32F, SWIZ_X001, 32, 1), - - FORMAT(R16G16_UNORM, NO, RG16, SWIZ_XY01, 32, 2), - FORMAT(R16G16_SNORM, NO, RG16_SNORM, SWIZ_XY01, 32, 2), - FORMAT(R16G16_FLOAT, RG16F, RG16F, SWIZ_XY01, 16, 0), - FORMAT(R32G32_FLOAT, RG32F, RG32F, SWIZ_XY01, 32, 2), - - FORMAT(R16G16B16A16_UNORM, NO, RGBA16, SWIZ_XYZW, 32, 4), - FORMAT(R16G16B16A16_SNORM, NO, RGBA16_SNORM, SWIZ_XYZW, 32, 4), - FORMAT(R16G16B16A16_FLOAT, RGBA16F, RGBA16F, SWIZ_XYZW, 16, 0), - FORMAT(R32G32B32A32_FLOAT, RGBA32F, RGBA32F, SWIZ_XYZW, 32, 4), - - /* If we don't have L/A/LA16, mesa/st will fall back to RGBA16. */ - FORMAT(L16_UNORM, NO, R16, SWIZ_XXX1, 32, 1), - FORMAT(L16_SNORM, NO, R16_SNORM, SWIZ_XXX1, 32, 1), - FORMAT(I16_UNORM, NO, R16, SWIZ_XXXX, 32, 1), - FORMAT(I16_SNORM, NO, R16_SNORM, SWIZ_XXXX, 32, 1), - FORMAT(A16_UNORM, NO, R16, SWIZ_000X, 32, 1), - FORMAT(A16_SNORM, NO, R16_SNORM, SWIZ_000X, 32, 1), - FORMAT(L16A16_UNORM, NO, RG16, SWIZ_XXXY, 32, 2), - FORMAT(L16A16_SNORM, NO, RG16_SNORM, SWIZ_XXXY, 32, 2), - - FORMAT(A8_UNORM, NO, R8, SWIZ_000X, 16, 0), - FORMAT(L8_UNORM, NO, R8, SWIZ_XXX1, 16, 0), - FORMAT(I8_UNORM, NO, R8, SWIZ_XXXX, 16, 0), - FORMAT(L8A8_UNORM, NO, RG8, SWIZ_XXXY, 16, 0), - - FORMAT(R8_SINT, R8I, R8I, SWIZ_X001, 16, 0), - FORMAT(R8_UINT, R8UI, R8UI, SWIZ_X001, 16, 0), - FORMAT(R8G8_SINT, RG8I, RG8I, SWIZ_XY01, 16, 0), - FORMAT(R8G8_UINT, RG8UI, RG8UI, SWIZ_XY01, 16, 0), - FORMAT(R8G8B8A8_SINT, RGBA8I, RGBA8I, SWIZ_XYZW, 16, 0), - FORMAT(R8G8B8A8_UINT, RGBA8UI, RGBA8UI, SWIZ_XYZW, 16, 0), - - FORMAT(R16_SINT, R16I, R16I, SWIZ_X001, 16, 0), - FORMAT(R16_UINT, R16UI, R16UI, SWIZ_X001, 16, 0), - FORMAT(R16G16_SINT, RG16I, RG16I, SWIZ_XY01, 16, 0), - FORMAT(R16G16_UINT, RG16UI, RG16UI, SWIZ_XY01, 16, 0), - FORMAT(R16G16B16A16_SINT, RGBA16I, RGBA16I, SWIZ_XYZW, 16, 0), - FORMAT(R16G16B16A16_UINT, RGBA16UI, RGBA16UI, SWIZ_XYZW, 16, 0), - - FORMAT(R32_SINT, R32I, R32I, SWIZ_X001, 32, 1), - FORMAT(R32_UINT, R32UI, R32UI, SWIZ_X001, 32, 1), - FORMAT(R32G32_SINT, RG32I, RG32I, SWIZ_XY01, 32, 2), - FORMAT(R32G32_UINT, RG32UI, RG32UI, SWIZ_XY01, 32, 2), - FORMAT(R32G32B32A32_SINT, RGBA32I, RGBA32I, SWIZ_XYZW, 32, 4), - FORMAT(R32G32B32A32_UINT, RGBA32UI, RGBA32UI, SWIZ_XYZW, 32, 4), - - FORMAT(A8_SINT, R8I, R8I, SWIZ_000X, 16, 0), - FORMAT(A8_UINT, R8UI, R8UI, SWIZ_000X, 16, 0), - FORMAT(A16_SINT, R16I, R16I, SWIZ_000X, 16, 0), - FORMAT(A16_UINT, R16UI, R16UI, SWIZ_000X, 16, 0), - FORMAT(A32_SINT, R32I, R32I, SWIZ_000X, 32, 1), - FORMAT(A32_UINT, R32UI, R32UI, SWIZ_000X, 32, 1), - - FORMAT(R11G11B10_FLOAT, R11F_G11F_B10F, R11F_G11F_B10F, SWIZ_XYZW, 16, 0), - FORMAT(R9G9B9E5_FLOAT, NO, RGB9_E5, SWIZ_XYZW, 16, 0), - -#if V3D_VERSION >= 40 - FORMAT(S8_UINT_Z24_UNORM, D24S8, DEPTH24_X8, SWIZ_XXXX, 32, 1), - FORMAT(X8Z24_UNORM, D24S8, DEPTH24_X8, SWIZ_XXXX, 32, 1), - FORMAT(S8X24_UINT, S8, R32F, SWIZ_XXXX, 32, 1), - FORMAT(Z32_FLOAT, D32F, R32F, SWIZ_XXXX, 32, 1), - FORMAT(Z16_UNORM, D16, DEPTH_COMP16,SWIZ_XXXX, 32, 1), - - /* Pretend we support this, but it'll be separate Z32F depth and S8. */ - FORMAT(Z32_FLOAT_S8X24_UINT, D32F, R32F, SWIZ_XXXX, 32, 1), -#else - FORMAT(S8_UINT_Z24_UNORM, ZS_DEPTH24_STENCIL8, DEPTH24_X8, SWIZ_XXXX, 32, 1), - FORMAT(X8Z24_UNORM, ZS_DEPTH24_STENCIL8, DEPTH24_X8, SWIZ_XXXX, 32, 1), - FORMAT(S8X24_UINT, NO, R32F, SWIZ_XXXX, 32, 1), - FORMAT(Z32_FLOAT, ZS_DEPTH_COMPONENT32F, R32F, SWIZ_XXXX, 32, 1), - FORMAT(Z16_UNORM, ZS_DEPTH_COMPONENT16, DEPTH_COMP16, SWIZ_XXXX, 32, 1), - - /* Pretend we support this, but it'll be separate Z32F depth and S8. */ - FORMAT(Z32_FLOAT_S8X24_UINT, ZS_DEPTH_COMPONENT32F, R32F, SWIZ_XXXX, 32, 1), -#endif - - FORMAT(ETC2_RGB8, NO, RGB8_ETC2, SWIZ_XYZ1, 16, 0), - FORMAT(ETC2_SRGB8, NO, RGB8_ETC2, SWIZ_XYZ1, 16, 0), - FORMAT(ETC2_RGB8A1, NO, RGB8_PUNCHTHROUGH_ALPHA1, SWIZ_XYZW, 16, 0), - FORMAT(ETC2_SRGB8A1, NO, RGB8_PUNCHTHROUGH_ALPHA1, SWIZ_XYZW, 16, 0), - FORMAT(ETC2_RGBA8, NO, RGBA8_ETC2_EAC, SWIZ_XYZW, 16, 0), - FORMAT(ETC2_SRGBA8, NO, RGBA8_ETC2_EAC, SWIZ_XYZW, 16, 0), - FORMAT(ETC2_R11_UNORM, NO, R11_EAC, SWIZ_X001, 16, 0), - FORMAT(ETC2_R11_SNORM, NO, SIGNED_R11_EAC, SWIZ_X001, 16, 0), - FORMAT(ETC2_RG11_UNORM, NO, RG11_EAC, SWIZ_XY01, 16, 0), - FORMAT(ETC2_RG11_SNORM, NO, SIGNED_RG11_EAC, SWIZ_XY01, 16, 0), - - FORMAT(DXT1_RGB, NO, BC1, SWIZ_XYZ1, 16, 0), - FORMAT(DXT3_RGBA, NO, BC2, SWIZ_XYZ1, 16, 0), - FORMAT(DXT5_RGBA, NO, BC3, SWIZ_XYZ1, 16, 0), -}; - -const struct vc5_format * -v3dX(get_format_desc)(enum pipe_format f) -{ - if (f < ARRAY_SIZE(format_table) && format_table[f].present) - return &format_table[f]; - else - return NULL; -} - -void -v3dX(get_internal_type_bpp_for_output_format)(uint32_t format, - uint32_t *type, - uint32_t *bpp) -{ - switch (format) { - case V3D_OUTPUT_IMAGE_FORMAT_RGBA8: -#if V3D_VERSION < 41 - case V3D_OUTPUT_IMAGE_FORMAT_RGBX8: -#endif - case V3D_OUTPUT_IMAGE_FORMAT_RGB8: - case V3D_OUTPUT_IMAGE_FORMAT_RG8: - case V3D_OUTPUT_IMAGE_FORMAT_R8: - case V3D_OUTPUT_IMAGE_FORMAT_ABGR4444: - case V3D_OUTPUT_IMAGE_FORMAT_BGR565: - case V3D_OUTPUT_IMAGE_FORMAT_ABGR1555: - *type = V3D_INTERNAL_TYPE_8; - *bpp = V3D_INTERNAL_BPP_32; - break; - - case V3D_OUTPUT_IMAGE_FORMAT_RGBA8I: - case V3D_OUTPUT_IMAGE_FORMAT_RG8I: - case V3D_OUTPUT_IMAGE_FORMAT_R8I: - *type = V3D_INTERNAL_TYPE_8I; - *bpp = V3D_INTERNAL_BPP_32; - break; - - case V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI: - case V3D_OUTPUT_IMAGE_FORMAT_RG8UI: - case V3D_OUTPUT_IMAGE_FORMAT_R8UI: - *type = V3D_INTERNAL_TYPE_8UI; - *bpp = V3D_INTERNAL_BPP_32; - break; - - case V3D_OUTPUT_IMAGE_FORMAT_SRGB8_ALPHA8: - case V3D_OUTPUT_IMAGE_FORMAT_SRGB: - case V3D_OUTPUT_IMAGE_FORMAT_RGB10_A2: - case V3D_OUTPUT_IMAGE_FORMAT_R11F_G11F_B10F: -#if V3D_VERSION < 41 - case V3D_OUTPUT_IMAGE_FORMAT_SRGBX8: -#endif - case V3D_OUTPUT_IMAGE_FORMAT_RGBA16F: - /* Note that sRGB RTs are stored in the tile buffer at 16F, - * and the conversion to sRGB happens at tilebuffer - * load/store. - */ - *type = V3D_INTERNAL_TYPE_16F; - *bpp = V3D_INTERNAL_BPP_64; - break; - - case V3D_OUTPUT_IMAGE_FORMAT_RG16F: - case V3D_OUTPUT_IMAGE_FORMAT_R16F: - *type = V3D_INTERNAL_TYPE_16F; - /* Use 64bpp to make sure the TLB doesn't throw away the alpha - * channel before alpha test happens. - */ - *bpp = V3D_INTERNAL_BPP_64; - break; - - case V3D_OUTPUT_IMAGE_FORMAT_RGBA16I: - *type = V3D_INTERNAL_TYPE_16I; - *bpp = V3D_INTERNAL_BPP_64; - break; - case V3D_OUTPUT_IMAGE_FORMAT_RG16I: - case V3D_OUTPUT_IMAGE_FORMAT_R16I: - *type = V3D_INTERNAL_TYPE_16I; - *bpp = V3D_INTERNAL_BPP_32; - break; - - case V3D_OUTPUT_IMAGE_FORMAT_RGB10_A2UI: - case V3D_OUTPUT_IMAGE_FORMAT_RGBA16UI: - *type = V3D_INTERNAL_TYPE_16UI; - *bpp = V3D_INTERNAL_BPP_64; - break; - case V3D_OUTPUT_IMAGE_FORMAT_RG16UI: - case V3D_OUTPUT_IMAGE_FORMAT_R16UI: - *type = V3D_INTERNAL_TYPE_16UI; - *bpp = V3D_INTERNAL_BPP_32; - break; - - case V3D_OUTPUT_IMAGE_FORMAT_RGBA32I: - *type = V3D_INTERNAL_TYPE_32I; - *bpp = V3D_INTERNAL_BPP_128; - break; - case V3D_OUTPUT_IMAGE_FORMAT_RG32I: - *type = V3D_INTERNAL_TYPE_32I; - *bpp = V3D_INTERNAL_BPP_64; - break; - case V3D_OUTPUT_IMAGE_FORMAT_R32I: - *type = V3D_INTERNAL_TYPE_32I; - *bpp = V3D_INTERNAL_BPP_32; - break; - - case V3D_OUTPUT_IMAGE_FORMAT_RGBA32UI: - *type = V3D_INTERNAL_TYPE_32UI; - *bpp = V3D_INTERNAL_BPP_128; - break; - case V3D_OUTPUT_IMAGE_FORMAT_RG32UI: - *type = V3D_INTERNAL_TYPE_32UI; - *bpp = V3D_INTERNAL_BPP_64; - break; - case V3D_OUTPUT_IMAGE_FORMAT_R32UI: - *type = V3D_INTERNAL_TYPE_32UI; - *bpp = V3D_INTERNAL_BPP_32; - break; - - case V3D_OUTPUT_IMAGE_FORMAT_RGBA32F: - *type = V3D_INTERNAL_TYPE_32F; - *bpp = V3D_INTERNAL_BPP_128; - break; - case V3D_OUTPUT_IMAGE_FORMAT_RG32F: - *type = V3D_INTERNAL_TYPE_32F; - *bpp = V3D_INTERNAL_BPP_64; - break; - case V3D_OUTPUT_IMAGE_FORMAT_R32F: - *type = V3D_INTERNAL_TYPE_32F; - *bpp = V3D_INTERNAL_BPP_32; - break; - - default: - /* Provide some default values, as we'll be called at RB - * creation time, even if an RB with this format isn't - * supported. - */ - *type = V3D_INTERNAL_TYPE_8; - *bpp = V3D_INTERNAL_BPP_32; - break; - } -} diff --git a/src/gallium/drivers/vc5/v3dx_job.c b/src/gallium/drivers/vc5/v3dx_job.c deleted file mode 100644 index ca3831c75bb..00000000000 --- a/src/gallium/drivers/vc5/v3dx_job.c +++ /dev/null @@ -1,76 +0,0 @@ -/* - * Copyright © 2014-2017 Broadcom - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -/** @file v3dx_job.c - * - * V3D version-specific functions for submitting VC5 render jobs to the - * kernel. - */ - -#include "vc5_context.h" -#include "broadcom/cle/v3dx_pack.h" - -void v3dX(bcl_epilogue)(struct vc5_context *vc5, struct vc5_job *job) -{ - vc5_cl_ensure_space_with_branch(&job->bcl, - cl_packet_length(OCCLUSION_QUERY_COUNTER) + -#if V3D_VERSION >= 41 - cl_packet_length(TRANSFORM_FEEDBACK_SPECS) + -#endif - cl_packet_length(INCREMENT_SEMAPHORE) + - cl_packet_length(FLUSH_ALL_STATE)); - - if (job->oq_enabled) { - /* Disable the OQ at the end of the CL, so that the - * draw calls at the start of the CL don't inherit the - * OQ counter. - */ - cl_emit(&job->bcl, OCCLUSION_QUERY_COUNTER, counter); - } - - /* Disable TF at the end of the CL, so that the next job to be - * run doesn't start out trying to write TF primitives. On - * V3D 3.x, it's only the TF primitive mode that triggers TF - * writes. - */ -#if V3D_VERSION >= 41 - if (job->tf_enabled) { - cl_emit(&job->bcl, TRANSFORM_FEEDBACK_SPECS, tfe) { - tfe.enable = false; - }; - } -#endif /* V3D_VERSION >= 41 */ - - /* Increment the semaphore indicating that binning is done and - * unblocking the render thread. Note that this doesn't act - * until the FLUSH completes. - */ - cl_emit(&job->bcl, INCREMENT_SEMAPHORE, incr); - - /* The FLUSH_ALL emits any unwritten state changes in each - * tile. We can use this to reset any state that needs to be - * present at the start of the next tile, as we do with - * OCCLUSION_QUERY_COUNTER above. - */ - cl_emit(&job->bcl, FLUSH_ALL_STATE, flush); -} diff --git a/src/gallium/drivers/vc5/v3dx_simulator.c b/src/gallium/drivers/vc5/v3dx_simulator.c deleted file mode 100644 index 1162869be27..00000000000 --- a/src/gallium/drivers/vc5/v3dx_simulator.c +++ /dev/null @@ -1,190 +0,0 @@ -/* - * Copyright © 2014-2017 Broadcom - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -/** - * @file vc5_simulator_hw.c - * - * Implements the actual HW interaction betweeh the GL driver's VC5 simulator and the simulator. - * - * The register headers between V3D versions will have conflicting defines, so - * all register interactions appear in this file and are compiled per V3D version - * we support. - */ - -#ifdef USE_V3D_SIMULATOR - -#include "vc5_screen.h" -#include "vc5_context.h" -#include "vc5_simulator_wrapper.h" - -#define HW_REGISTER_RO(x) (x) -#define HW_REGISTER_RW(x) (x) -#if V3D_VERSION >= 41 -#include "libs/core/v3d/registers/4.1.34.0/v3d.h" -#else -#include "libs/core/v3d/registers/3.3.0.0/v3d.h" -#endif - -#define V3D_WRITE(reg, val) v3d_hw_write_reg(v3d, reg, val) -#define V3D_READ(reg) v3d_hw_read_reg(v3d, reg) - -static void -vc5_flush_l3(struct v3d_hw *v3d) -{ - if (!v3d_hw_has_gca(v3d)) - return; - -#if V3D_VERSION < 40 - uint32_t gca_ctrl = V3D_READ(V3D_GCA_CACHE_CTRL); - - V3D_WRITE(V3D_GCA_CACHE_CTRL, gca_ctrl | V3D_GCA_CACHE_CTRL_FLUSH_SET); - V3D_WRITE(V3D_GCA_CACHE_CTRL, gca_ctrl & ~V3D_GCA_CACHE_CTRL_FLUSH_SET); -#endif -} - -/* Invalidates the L2 cache. This is a read-only cache. */ -static void -vc5_flush_l2(struct v3d_hw *v3d) -{ - V3D_WRITE(V3D_CTL_0_L2CACTL, - V3D_CTL_0_L2CACTL_L2CCLR_SET | - V3D_CTL_0_L2CACTL_L2CENA_SET); -} - -/* Invalidates texture L2 cachelines */ -static void -vc5_flush_l2t(struct v3d_hw *v3d) -{ - V3D_WRITE(V3D_CTL_0_L2TFLSTA, 0); - V3D_WRITE(V3D_CTL_0_L2TFLEND, ~0); - V3D_WRITE(V3D_CTL_0_L2TCACTL, - V3D_CTL_0_L2TCACTL_L2TFLS_SET | - (0 << V3D_CTL_0_L2TCACTL_L2TFLM_LSB)); -} - -/* Invalidates the slice caches. These are read-only caches. */ -static void -vc5_flush_slices(struct v3d_hw *v3d) -{ - V3D_WRITE(V3D_CTL_0_SLCACTL, ~0); -} - -static void -vc5_flush_caches(struct v3d_hw *v3d) -{ - vc5_flush_l3(v3d); - vc5_flush_l2(v3d); - vc5_flush_l2t(v3d); - vc5_flush_slices(v3d); -} - -int -v3dX(simulator_get_param_ioctl)(struct v3d_hw *v3d, - struct drm_v3d_get_param *args) -{ - static const uint32_t reg_map[] = { - [DRM_V3D_PARAM_V3D_UIFCFG] = V3D_HUB_CTL_UIFCFG, - [DRM_V3D_PARAM_V3D_HUB_IDENT1] = V3D_HUB_CTL_IDENT1, - [DRM_V3D_PARAM_V3D_HUB_IDENT2] = V3D_HUB_CTL_IDENT2, - [DRM_V3D_PARAM_V3D_HUB_IDENT3] = V3D_HUB_CTL_IDENT3, - [DRM_V3D_PARAM_V3D_CORE0_IDENT0] = V3D_CTL_0_IDENT0, - [DRM_V3D_PARAM_V3D_CORE0_IDENT1] = V3D_CTL_0_IDENT1, - [DRM_V3D_PARAM_V3D_CORE0_IDENT2] = V3D_CTL_0_IDENT2, - }; - - if (args->param < ARRAY_SIZE(reg_map) && reg_map[args->param]) { - args->value = V3D_READ(reg_map[args->param]); - return 0; - } - - fprintf(stderr, "Unknown DRM_IOCTL_VC5_GET_PARAM(%lld)\n", - (long long)args->value); - abort(); -} - -void -v3dX(simulator_init_regs)(struct v3d_hw *v3d) -{ -#if V3D_VERSION == 33 - /* Set OVRTMUOUT to match kernel behavior. - * - * This means that the texture sampler uniform configuration's tmu - * output type field is used, instead of using the hardware default - * behavior based on the texture type. If you want the default - * behavior, you can still put "2" in the indirect texture state's - * output_type field. - */ - V3D_WRITE(V3D_CTL_0_MISCCFG, V3D_CTL_1_MISCCFG_OVRTMUOUT_SET); -#endif -} - -void -v3dX(simulator_flush)(struct v3d_hw *v3d, struct drm_v3d_submit_cl *submit, - uint32_t gmp_ofs) -{ - /* Completely reset the GMP. */ - V3D_WRITE(V3D_GMP_0_CFG, - V3D_GMP_0_CFG_PROTENABLE_SET); - V3D_WRITE(V3D_GMP_0_TABLE_ADDR, gmp_ofs); - V3D_WRITE(V3D_GMP_0_CLEAR_LOAD, ~0); - while (V3D_READ(V3D_GMP_0_STATUS) & - V3D_GMP_0_STATUS_CFG_BUSY_SET) { - ; - } - - vc5_flush_caches(v3d); - - if (submit->qma) { - V3D_WRITE(V3D_CLE_0_CT0QMA, submit->qma); - V3D_WRITE(V3D_CLE_0_CT0QMS, submit->qms); - } -#if V3D_VERSION >= 41 - if (submit->qts) { - V3D_WRITE(V3D_CLE_0_CT0QTS, - V3D_CLE_0_CT0QTS_CTQTSEN_SET | - submit->qts); - } -#endif - V3D_WRITE(V3D_CLE_0_CT0QBA, submit->bcl_start); - V3D_WRITE(V3D_CLE_0_CT0QEA, submit->bcl_end); - - /* Wait for bin to complete before firing render, as it seems the - * simulator doesn't implement the semaphores. - */ - while (V3D_READ(V3D_CLE_0_CT0CA) != - V3D_READ(V3D_CLE_0_CT0EA)) { - v3d_hw_tick(v3d); - } - - V3D_WRITE(V3D_CLE_0_CT1QBA, submit->rcl_start); - V3D_WRITE(V3D_CLE_0_CT1QEA, submit->rcl_end); - - while (V3D_READ(V3D_CLE_0_CT1CA) != - V3D_READ(V3D_CLE_0_CT1EA) || - V3D_READ(V3D_CLE_1_CT1CA) != - V3D_READ(V3D_CLE_1_CT1EA)) { - v3d_hw_tick(v3d); - } -} - -#endif /* USE_V3D_SIMULATOR */ diff --git a/src/gallium/drivers/vc5/vc5_blit.c b/src/gallium/drivers/vc5/vc5_blit.c deleted file mode 100644 index 66f530723f2..00000000000 --- a/src/gallium/drivers/vc5/vc5_blit.c +++ /dev/null @@ -1,302 +0,0 @@ -/* - * Copyright © 2015-2017 Broadcom - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "util/u_format.h" -#include "util/u_surface.h" -#include "util/u_blitter.h" -#include "vc5_context.h" - -#if 0 -static struct pipe_surface * -vc5_get_blit_surface(struct pipe_context *pctx, - struct pipe_resource *prsc, unsigned level) -{ - struct pipe_surface tmpl; - - memset(&tmpl, 0, sizeof(tmpl)); - tmpl.format = prsc->format; - tmpl.u.tex.level = level; - tmpl.u.tex.first_layer = 0; - tmpl.u.tex.last_layer = 0; - - return pctx->create_surface(pctx, prsc, &tmpl); -} - -static bool -is_tile_unaligned(unsigned size, unsigned tile_size) -{ - return size & (tile_size - 1); -} - -static bool -vc5_tile_blit(struct pipe_context *pctx, const struct pipe_blit_info *info) -{ - struct vc5_context *vc5 = vc5_context(pctx); - bool msaa = (info->src.resource->nr_samples > 1 || - info->dst.resource->nr_samples > 1); - int tile_width = msaa ? 32 : 64; - int tile_height = msaa ? 32 : 64; - - if (util_format_is_depth_or_stencil(info->dst.resource->format)) - return false; - - if (info->scissor_enable) - return false; - - if ((info->mask & PIPE_MASK_RGBA) == 0) - return false; - - if (info->dst.box.x != info->src.box.x || - info->dst.box.y != info->src.box.y || - info->dst.box.width != info->src.box.width || - info->dst.box.height != info->src.box.height) { - return false; - } - - int dst_surface_width = u_minify(info->dst.resource->width0, - info->dst.level); - int dst_surface_height = u_minify(info->dst.resource->height0, - info->dst.level); - if (is_tile_unaligned(info->dst.box.x, tile_width) || - is_tile_unaligned(info->dst.box.y, tile_height) || - (is_tile_unaligned(info->dst.box.width, tile_width) && - info->dst.box.x + info->dst.box.width != dst_surface_width) || - (is_tile_unaligned(info->dst.box.height, tile_height) && - info->dst.box.y + info->dst.box.height != dst_surface_height)) { - return false; - } - - /* VC5_PACKET_LOAD_TILE_BUFFER_GENERAL uses the - * VC5_PACKET_TILE_RENDERING_MODE_CONFIG's width (determined by our - * destination surface) to determine the stride. This may be wrong - * when reading from texture miplevels > 0, which are stored in - * POT-sized areas. For MSAA, the tile addresses are computed - * explicitly by the RCL, but still use the destination width to - * determine the stride (which could be fixed by explicitly supplying - * it in the ABI). - */ - struct vc5_resource *rsc = vc5_resource(info->src.resource); - - uint32_t stride; - - if (info->src.resource->nr_samples > 1) - stride = align(dst_surface_width, 32) * 4 * rsc->cpp; - /* XXX else if (rsc->slices[info->src.level].tiling == VC5_TILING_FORMAT_T) - stride = align(dst_surface_width * rsc->cpp, 128); */ - else - stride = align(dst_surface_width * rsc->cpp, 16); - - if (stride != rsc->slices[info->src.level].stride) - return false; - - if (info->dst.resource->format != info->src.resource->format) - return false; - - if (false) { - fprintf(stderr, "RCL blit from %d,%d to %d,%d (%d,%d)\n", - info->src.box.x, - info->src.box.y, - info->dst.box.x, - info->dst.box.y, - info->dst.box.width, - info->dst.box.height); - } - - struct pipe_surface *dst_surf = - vc5_get_blit_surface(pctx, info->dst.resource, info->dst.level); - struct pipe_surface *src_surf = - vc5_get_blit_surface(pctx, info->src.resource, info->src.level); - - vc5_flush_jobs_reading_resource(vc5, info->src.resource); - - struct vc5_job *job = vc5_get_job(vc5, dst_surf, NULL); - pipe_surface_reference(&job->color_read, src_surf); - - /* If we're resolving from MSAA to single sample, we still need to run - * the engine in MSAA mode for the load. - */ - if (!job->msaa && info->src.resource->nr_samples > 1) { - job->msaa = true; - job->tile_width = 32; - job->tile_height = 32; - } - - job->draw_min_x = info->dst.box.x; - job->draw_min_y = info->dst.box.y; - job->draw_max_x = info->dst.box.x + info->dst.box.width; - job->draw_max_y = info->dst.box.y + info->dst.box.height; - job->draw_width = dst_surf->width; - job->draw_height = dst_surf->height; - - job->tile_width = tile_width; - job->tile_height = tile_height; - job->msaa = msaa; - job->needs_flush = true; - job->resolve |= PIPE_CLEAR_COLOR; - - vc5_job_submit(vc5, job); - - pipe_surface_reference(&dst_surf, NULL); - pipe_surface_reference(&src_surf, NULL); - - return true; -} -#endif - -void -vc5_blitter_save(struct vc5_context *vc5) -{ - util_blitter_save_fragment_constant_buffer_slot(vc5->blitter, - vc5->constbuf[PIPE_SHADER_FRAGMENT].cb); - util_blitter_save_vertex_buffer_slot(vc5->blitter, vc5->vertexbuf.vb); - util_blitter_save_vertex_elements(vc5->blitter, vc5->vtx); - util_blitter_save_vertex_shader(vc5->blitter, vc5->prog.bind_vs); - util_blitter_save_so_targets(vc5->blitter, vc5->streamout.num_targets, - vc5->streamout.targets); - util_blitter_save_rasterizer(vc5->blitter, vc5->rasterizer); - util_blitter_save_viewport(vc5->blitter, &vc5->viewport); - util_blitter_save_scissor(vc5->blitter, &vc5->scissor); - util_blitter_save_fragment_shader(vc5->blitter, vc5->prog.bind_fs); - util_blitter_save_blend(vc5->blitter, vc5->blend); - util_blitter_save_depth_stencil_alpha(vc5->blitter, vc5->zsa); - util_blitter_save_stencil_ref(vc5->blitter, &vc5->stencil_ref); - util_blitter_save_sample_mask(vc5->blitter, vc5->sample_mask); - util_blitter_save_framebuffer(vc5->blitter, &vc5->framebuffer); - util_blitter_save_fragment_sampler_states(vc5->blitter, - vc5->fragtex.num_samplers, - (void **)vc5->fragtex.samplers); - util_blitter_save_fragment_sampler_views(vc5->blitter, - vc5->fragtex.num_textures, vc5->fragtex.textures); - util_blitter_save_so_targets(vc5->blitter, vc5->streamout.num_targets, - vc5->streamout.targets); -} - -static bool -vc5_render_blit(struct pipe_context *ctx, struct pipe_blit_info *info) -{ - struct vc5_context *vc5 = vc5_context(ctx); - - if (!util_blitter_is_blit_supported(vc5->blitter, info)) { - fprintf(stderr, "blit unsupported %s -> %s\n", - util_format_short_name(info->src.resource->format), - util_format_short_name(info->dst.resource->format)); - return false; - } - - vc5_blitter_save(vc5); - util_blitter_blit(vc5->blitter, info); - - return true; -} - -/* Implement stencil blits by reinterpreting the stencil data as an RGBA8888 - * or R8 texture. - */ -static void -vc5_stencil_blit(struct pipe_context *ctx, const struct pipe_blit_info *info) -{ - struct vc5_context *vc5 = vc5_context(ctx); - struct vc5_resource *src = vc5_resource(info->src.resource); - struct vc5_resource *dst = vc5_resource(info->dst.resource); - enum pipe_format src_format, dst_format; - - if (src->separate_stencil) { - src = src->separate_stencil; - src_format = PIPE_FORMAT_R8_UNORM; - } else { - src_format = PIPE_FORMAT_RGBA8888_UNORM; - } - - if (dst->separate_stencil) { - dst = dst->separate_stencil; - dst_format = PIPE_FORMAT_R8_UNORM; - } else { - dst_format = PIPE_FORMAT_RGBA8888_UNORM; - } - - /* Initialize the surface. */ - struct pipe_surface dst_tmpl = { - .u.tex = { - .level = info->dst.level, - .first_layer = info->dst.box.z, - .last_layer = info->dst.box.z, - }, - .format = dst_format, - }; - struct pipe_surface *dst_surf = - ctx->create_surface(ctx, &dst->base, &dst_tmpl); - - /* Initialize the sampler view. */ - struct pipe_sampler_view src_tmpl = { - .target = src->base.target, - .format = src_format, - .u.tex = { - .first_level = info->src.level, - .last_level = info->src.level, - .first_layer = 0, - .last_layer = (PIPE_TEXTURE_3D ? - u_minify(src->base.depth0, - info->src.level) - 1 : - src->base.array_size - 1), - }, - .swizzle_r = PIPE_SWIZZLE_X, - .swizzle_g = PIPE_SWIZZLE_Y, - .swizzle_b = PIPE_SWIZZLE_Z, - .swizzle_a = PIPE_SWIZZLE_W, - }; - struct pipe_sampler_view *src_view = - ctx->create_sampler_view(ctx, &src->base, &src_tmpl); - - vc5_blitter_save(vc5); - util_blitter_blit_generic(vc5->blitter, dst_surf, &info->dst.box, - src_view, &info->src.box, - src->base.width0, src->base.height0, - PIPE_MASK_R, - PIPE_TEX_FILTER_NEAREST, - info->scissor_enable ? &info->scissor : NULL, - info->alpha_blend); - - pipe_surface_reference(&dst_surf, NULL); - pipe_sampler_view_reference(&src_view, NULL); -} - -/* Optimal hardware path for blitting pixels. - * Scaling, format conversion, up- and downsampling (resolve) are allowed. - */ -void -vc5_blit(struct pipe_context *pctx, const struct pipe_blit_info *blit_info) -{ - struct pipe_blit_info info = *blit_info; - - if (info.mask & PIPE_MASK_S) { - vc5_stencil_blit(pctx, blit_info); - info.mask &= ~PIPE_MASK_S; - } - -#if 0 - if (vc5_tile_blit(pctx, blit_info)) - return; -#endif - - vc5_render_blit(pctx, &info); -} diff --git a/src/gallium/drivers/vc5/vc5_bufmgr.c b/src/gallium/drivers/vc5/vc5_bufmgr.c deleted file mode 100644 index 2773df3cf2f..00000000000 --- a/src/gallium/drivers/vc5/vc5_bufmgr.c +++ /dev/null @@ -1,552 +0,0 @@ -/* - * Copyright © 2014-2017 Broadcom - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include -#include -#include -#include -#include -#include - -#include "util/u_hash_table.h" -#include "util/u_memory.h" -#include "util/ralloc.h" - -#include "vc5_context.h" -#include "vc5_screen.h" - -#ifdef HAVE_VALGRIND -#include -#include -#define VG(x) x -#else -#define VG(x) -#endif - -static bool dump_stats = false; - -static void -vc5_bo_cache_free_all(struct vc5_bo_cache *cache); - -static void -vc5_bo_dump_stats(struct vc5_screen *screen) -{ - struct vc5_bo_cache *cache = &screen->bo_cache; - - fprintf(stderr, " BOs allocated: %d\n", screen->bo_count); - fprintf(stderr, " BOs size: %dkb\n", screen->bo_size / 1024); - fprintf(stderr, " BOs cached: %d\n", cache->bo_count); - fprintf(stderr, " BOs cached size: %dkb\n", cache->bo_size / 1024); - - if (!list_empty(&cache->time_list)) { - struct vc5_bo *first = LIST_ENTRY(struct vc5_bo, - cache->time_list.next, - time_list); - struct vc5_bo *last = LIST_ENTRY(struct vc5_bo, - cache->time_list.prev, - time_list); - - fprintf(stderr, " oldest cache time: %ld\n", - (long)first->free_time); - fprintf(stderr, " newest cache time: %ld\n", - (long)last->free_time); - - struct timespec time; - clock_gettime(CLOCK_MONOTONIC, &time); - fprintf(stderr, " now: %ld\n", - time.tv_sec); - } -} - -static void -vc5_bo_remove_from_cache(struct vc5_bo_cache *cache, struct vc5_bo *bo) -{ - list_del(&bo->time_list); - list_del(&bo->size_list); - cache->bo_count--; - cache->bo_size -= bo->size; -} - -static struct vc5_bo * -vc5_bo_from_cache(struct vc5_screen *screen, uint32_t size, const char *name) -{ - struct vc5_bo_cache *cache = &screen->bo_cache; - uint32_t page_index = size / 4096 - 1; - - if (cache->size_list_size <= page_index) - return NULL; - - struct vc5_bo *bo = NULL; - mtx_lock(&cache->lock); - if (!list_empty(&cache->size_list[page_index])) { - bo = LIST_ENTRY(struct vc5_bo, cache->size_list[page_index].next, - size_list); - - /* Check that the BO has gone idle. If not, then we want to - * allocate something new instead, since we assume that the - * user will proceed to CPU map it and fill it with stuff. - */ - if (!vc5_bo_wait(bo, 0, NULL)) { - mtx_unlock(&cache->lock); - return NULL; - } - - pipe_reference_init(&bo->reference, 1); - vc5_bo_remove_from_cache(cache, bo); - - bo->name = name; - } - mtx_unlock(&cache->lock); - return bo; -} - -struct vc5_bo * -vc5_bo_alloc(struct vc5_screen *screen, uint32_t size, const char *name) -{ - struct vc5_bo *bo; - int ret; - - size = align(size, 4096); - - bo = vc5_bo_from_cache(screen, size, name); - if (bo) { - if (dump_stats) { - fprintf(stderr, "Allocated %s %dkb from cache:\n", - name, size / 1024); - vc5_bo_dump_stats(screen); - } - return bo; - } - - bo = CALLOC_STRUCT(vc5_bo); - if (!bo) - return NULL; - - pipe_reference_init(&bo->reference, 1); - bo->screen = screen; - bo->size = size; - bo->name = name; - bo->private = true; - - retry: - ; - - bool cleared_and_retried = false; - struct drm_v3d_create_bo create = { - .size = size - }; - - ret = vc5_ioctl(screen->fd, DRM_IOCTL_V3D_CREATE_BO, &create); - bo->handle = create.handle; - bo->offset = create.offset; - - if (ret != 0) { - if (!list_empty(&screen->bo_cache.time_list) && - !cleared_and_retried) { - cleared_and_retried = true; - vc5_bo_cache_free_all(&screen->bo_cache); - goto retry; - } - - free(bo); - return NULL; - } - - screen->bo_count++; - screen->bo_size += bo->size; - if (dump_stats) { - fprintf(stderr, "Allocated %s %dkb:\n", name, size / 1024); - vc5_bo_dump_stats(screen); - } - - return bo; -} - -void -vc5_bo_last_unreference(struct vc5_bo *bo) -{ - struct vc5_screen *screen = bo->screen; - - struct timespec time; - clock_gettime(CLOCK_MONOTONIC, &time); - mtx_lock(&screen->bo_cache.lock); - vc5_bo_last_unreference_locked_timed(bo, time.tv_sec); - mtx_unlock(&screen->bo_cache.lock); -} - -static void -vc5_bo_free(struct vc5_bo *bo) -{ - struct vc5_screen *screen = bo->screen; - - if (bo->map) { - if (using_vc5_simulator && bo->name && - strcmp(bo->name, "winsys") == 0) { - free(bo->map); - } else { - munmap(bo->map, bo->size); - VG(VALGRIND_FREELIKE_BLOCK(bo->map, 0)); - } - } - - struct drm_gem_close c; - memset(&c, 0, sizeof(c)); - c.handle = bo->handle; - int ret = vc5_ioctl(screen->fd, DRM_IOCTL_GEM_CLOSE, &c); - if (ret != 0) - fprintf(stderr, "close object %d: %s\n", bo->handle, strerror(errno)); - - screen->bo_count--; - screen->bo_size -= bo->size; - - if (dump_stats) { - fprintf(stderr, "Freed %s%s%dkb:\n", - bo->name ? bo->name : "", - bo->name ? " " : "", - bo->size / 1024); - vc5_bo_dump_stats(screen); - } - - free(bo); -} - -static void -free_stale_bos(struct vc5_screen *screen, time_t time) -{ - struct vc5_bo_cache *cache = &screen->bo_cache; - bool freed_any = false; - - list_for_each_entry_safe(struct vc5_bo, bo, &cache->time_list, - time_list) { - if (dump_stats && !freed_any) { - fprintf(stderr, "Freeing stale BOs:\n"); - vc5_bo_dump_stats(screen); - freed_any = true; - } - - /* If it's more than a second old, free it. */ - if (time - bo->free_time > 2) { - vc5_bo_remove_from_cache(cache, bo); - vc5_bo_free(bo); - } else { - break; - } - } - - if (dump_stats && freed_any) { - fprintf(stderr, "Freed stale BOs:\n"); - vc5_bo_dump_stats(screen); - } -} - -static void -vc5_bo_cache_free_all(struct vc5_bo_cache *cache) -{ - mtx_lock(&cache->lock); - list_for_each_entry_safe(struct vc5_bo, bo, &cache->time_list, - time_list) { - vc5_bo_remove_from_cache(cache, bo); - vc5_bo_free(bo); - } - mtx_unlock(&cache->lock); -} - -void -vc5_bo_last_unreference_locked_timed(struct vc5_bo *bo, time_t time) -{ - struct vc5_screen *screen = bo->screen; - struct vc5_bo_cache *cache = &screen->bo_cache; - uint32_t page_index = bo->size / 4096 - 1; - - if (!bo->private) { - vc5_bo_free(bo); - return; - } - - if (cache->size_list_size <= page_index) { - struct list_head *new_list = - ralloc_array(screen, struct list_head, page_index + 1); - - /* Move old list contents over (since the array has moved, and - * therefore the pointers to the list heads have to change). - */ - for (int i = 0; i < cache->size_list_size; i++) { - struct list_head *old_head = &cache->size_list[i]; - if (list_empty(old_head)) - list_inithead(&new_list[i]); - else { - new_list[i].next = old_head->next; - new_list[i].prev = old_head->prev; - new_list[i].next->prev = &new_list[i]; - new_list[i].prev->next = &new_list[i]; - } - } - for (int i = cache->size_list_size; i < page_index + 1; i++) - list_inithead(&new_list[i]); - - cache->size_list = new_list; - cache->size_list_size = page_index + 1; - } - - bo->free_time = time; - list_addtail(&bo->size_list, &cache->size_list[page_index]); - list_addtail(&bo->time_list, &cache->time_list); - cache->bo_count++; - cache->bo_size += bo->size; - if (dump_stats) { - fprintf(stderr, "Freed %s %dkb to cache:\n", - bo->name, bo->size / 1024); - vc5_bo_dump_stats(screen); - } - bo->name = NULL; - - free_stale_bos(screen, time); -} - -static struct vc5_bo * -vc5_bo_open_handle(struct vc5_screen *screen, - uint32_t winsys_stride, - uint32_t handle, uint32_t size) -{ - struct vc5_bo *bo; - - assert(size); - - mtx_lock(&screen->bo_handles_mutex); - - bo = util_hash_table_get(screen->bo_handles, (void*)(uintptr_t)handle); - if (bo) { - pipe_reference(NULL, &bo->reference); - goto done; - } - - bo = CALLOC_STRUCT(vc5_bo); - pipe_reference_init(&bo->reference, 1); - bo->screen = screen; - bo->handle = handle; - bo->size = size; - bo->name = "winsys"; - bo->private = false; - -#ifdef USE_V3D_SIMULATOR - vc5_simulator_open_from_handle(screen->fd, winsys_stride, - bo->handle, bo->size); - bo->map = malloc(bo->size); -#endif - - struct drm_v3d_get_bo_offset get = { - .handle = handle, - }; - int ret = vc5_ioctl(screen->fd, DRM_IOCTL_V3D_GET_BO_OFFSET, &get); - if (ret) { - fprintf(stderr, "Failed to get BO offset: %s\n", - strerror(errno)); - free(bo->map); - free(bo); - return NULL; - } - bo->offset = get.offset; - assert(bo->offset != 0); - - util_hash_table_set(screen->bo_handles, (void *)(uintptr_t)handle, bo); - -done: - mtx_unlock(&screen->bo_handles_mutex); - return bo; -} - -struct vc5_bo * -vc5_bo_open_name(struct vc5_screen *screen, uint32_t name, - uint32_t winsys_stride) -{ - struct drm_gem_open o = { - .name = name - }; - int ret = vc5_ioctl(screen->fd, DRM_IOCTL_GEM_OPEN, &o); - if (ret) { - fprintf(stderr, "Failed to open bo %d: %s\n", - name, strerror(errno)); - return NULL; - } - - return vc5_bo_open_handle(screen, winsys_stride, o.handle, o.size); -} - -struct vc5_bo * -vc5_bo_open_dmabuf(struct vc5_screen *screen, int fd, uint32_t winsys_stride) -{ - uint32_t handle; - int ret = drmPrimeFDToHandle(screen->fd, fd, &handle); - int size; - if (ret) { - fprintf(stderr, "Failed to get vc5 handle for dmabuf %d\n", fd); - return NULL; - } - - /* Determine the size of the bo we were handed. */ - size = lseek(fd, 0, SEEK_END); - if (size == -1) { - fprintf(stderr, "Couldn't get size of dmabuf fd %d.\n", fd); - return NULL; - } - - return vc5_bo_open_handle(screen, winsys_stride, handle, size); -} - -int -vc5_bo_get_dmabuf(struct vc5_bo *bo) -{ - int fd; - int ret = drmPrimeHandleToFD(bo->screen->fd, bo->handle, - O_CLOEXEC, &fd); - if (ret != 0) { - fprintf(stderr, "Failed to export gem bo %d to dmabuf\n", - bo->handle); - return -1; - } - - mtx_lock(&bo->screen->bo_handles_mutex); - bo->private = false; - util_hash_table_set(bo->screen->bo_handles, (void *)(uintptr_t)bo->handle, bo); - mtx_unlock(&bo->screen->bo_handles_mutex); - - return fd; -} - -bool -vc5_bo_flink(struct vc5_bo *bo, uint32_t *name) -{ - struct drm_gem_flink flink = { - .handle = bo->handle, - }; - int ret = vc5_ioctl(bo->screen->fd, DRM_IOCTL_GEM_FLINK, &flink); - if (ret) { - fprintf(stderr, "Failed to flink bo %d: %s\n", - bo->handle, strerror(errno)); - free(bo); - return false; - } - - bo->private = false; - *name = flink.name; - - return true; -} - -static int vc5_wait_bo_ioctl(int fd, uint32_t handle, uint64_t timeout_ns) -{ - struct drm_v3d_wait_bo wait = { - .handle = handle, - .timeout_ns = timeout_ns, - }; - int ret = vc5_ioctl(fd, DRM_IOCTL_V3D_WAIT_BO, &wait); - if (ret == -1) - return -errno; - else - return 0; - -} - -bool -vc5_bo_wait(struct vc5_bo *bo, uint64_t timeout_ns, const char *reason) -{ - struct vc5_screen *screen = bo->screen; - - if (unlikely(V3D_DEBUG & V3D_DEBUG_PERF) && timeout_ns && reason) { - if (vc5_wait_bo_ioctl(screen->fd, bo->handle, 0) == -ETIME) { - fprintf(stderr, "Blocking on %s BO for %s\n", - bo->name, reason); - } - } - - int ret = vc5_wait_bo_ioctl(screen->fd, bo->handle, timeout_ns); - if (ret) { - if (ret != -ETIME) { - fprintf(stderr, "wait failed: %d\n", ret); - abort(); - } - - return false; - } - - return true; -} - -void * -vc5_bo_map_unsynchronized(struct vc5_bo *bo) -{ - uint64_t offset; - int ret; - - if (bo->map) - return bo->map; - - struct drm_v3d_mmap_bo map; - memset(&map, 0, sizeof(map)); - map.handle = bo->handle; - ret = vc5_ioctl(bo->screen->fd, DRM_IOCTL_V3D_MMAP_BO, &map); - offset = map.offset; - if (ret != 0) { - fprintf(stderr, "map ioctl failure\n"); - abort(); - } - - bo->map = mmap(NULL, bo->size, PROT_READ | PROT_WRITE, MAP_SHARED, - bo->screen->fd, offset); - if (bo->map == MAP_FAILED) { - fprintf(stderr, "mmap of bo %d (offset 0x%016llx, size %d) failed\n", - bo->handle, (long long)offset, bo->size); - abort(); - } - VG(VALGRIND_MALLOCLIKE_BLOCK(bo->map, bo->size, 0, false)); - - return bo->map; -} - -void * -vc5_bo_map(struct vc5_bo *bo) -{ - void *map = vc5_bo_map_unsynchronized(bo); - - bool ok = vc5_bo_wait(bo, PIPE_TIMEOUT_INFINITE, "bo map"); - if (!ok) { - fprintf(stderr, "BO wait for map failed\n"); - abort(); - } - - return map; -} - -void -vc5_bufmgr_destroy(struct pipe_screen *pscreen) -{ - struct vc5_screen *screen = vc5_screen(pscreen); - struct vc5_bo_cache *cache = &screen->bo_cache; - - vc5_bo_cache_free_all(cache); - - if (dump_stats) { - fprintf(stderr, "BO stats after screen destroy:\n"); - vc5_bo_dump_stats(screen); - } -} diff --git a/src/gallium/drivers/vc5/vc5_bufmgr.h b/src/gallium/drivers/vc5/vc5_bufmgr.h deleted file mode 100644 index cca2b22874f..00000000000 --- a/src/gallium/drivers/vc5/vc5_bufmgr.h +++ /dev/null @@ -1,140 +0,0 @@ -/* - * Copyright © 2014 Broadcom - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#ifndef VC5_BUFMGR_H -#define VC5_BUFMGR_H - -#include -#include "util/u_hash_table.h" -#include "util/u_inlines.h" -#include "util/list.h" -#include "vc5_screen.h" - -struct vc5_context; - -struct vc5_bo { - struct pipe_reference reference; - struct vc5_screen *screen; - void *map; - const char *name; - uint32_t handle; - uint32_t size; - - /* Address of the BO in our page tables. */ - uint32_t offset; - - /** Entry in the linked list of buffers freed, by age. */ - struct list_head time_list; - /** Entry in the per-page-count linked list of buffers freed (by age). */ - struct list_head size_list; - /** Approximate second when the bo was freed. */ - time_t free_time; - /** - * Whether only our process has a reference to the BO (meaning that - * it's safe to reuse it in the BO cache). - */ - bool private; -}; - -struct vc5_bo *vc5_bo_alloc(struct vc5_screen *screen, uint32_t size, - const char *name); -void vc5_bo_last_unreference(struct vc5_bo *bo); -void vc5_bo_last_unreference_locked_timed(struct vc5_bo *bo, time_t time); -struct vc5_bo *vc5_bo_open_name(struct vc5_screen *screen, uint32_t name, - uint32_t winsys_stride); -struct vc5_bo *vc5_bo_open_dmabuf(struct vc5_screen *screen, int fd, - uint32_t winsys_stride); -bool vc5_bo_flink(struct vc5_bo *bo, uint32_t *name); -int vc5_bo_get_dmabuf(struct vc5_bo *bo); - -static inline void -vc5_bo_set_reference(struct vc5_bo **old_bo, struct vc5_bo *new_bo) -{ - if (pipe_reference(&(*old_bo)->reference, &new_bo->reference)) - vc5_bo_last_unreference(*old_bo); - *old_bo = new_bo; -} - -static inline struct vc5_bo * -vc5_bo_reference(struct vc5_bo *bo) -{ - pipe_reference(NULL, &bo->reference); - return bo; -} - -static inline void -vc5_bo_unreference(struct vc5_bo **bo) -{ - struct vc5_screen *screen; - if (!*bo) - return; - - if ((*bo)->private) { - /* Avoid the mutex for private BOs */ - if (pipe_reference(&(*bo)->reference, NULL)) - vc5_bo_last_unreference(*bo); - } else { - screen = (*bo)->screen; - mtx_lock(&screen->bo_handles_mutex); - - if (pipe_reference(&(*bo)->reference, NULL)) { - util_hash_table_remove(screen->bo_handles, - (void *)(uintptr_t)(*bo)->handle); - vc5_bo_last_unreference(*bo); - } - - mtx_unlock(&screen->bo_handles_mutex); - } - - *bo = NULL; -} - -static inline void -vc5_bo_unreference_locked_timed(struct vc5_bo **bo, time_t time) -{ - if (!*bo) - return; - - if (pipe_reference(&(*bo)->reference, NULL)) - vc5_bo_last_unreference_locked_timed(*bo, time); - *bo = NULL; -} - -void * -vc5_bo_map(struct vc5_bo *bo); - -void * -vc5_bo_map_unsynchronized(struct vc5_bo *bo); - -bool -vc5_bo_wait(struct vc5_bo *bo, uint64_t timeout_ns, const char *reason); - -bool -vc5_wait_seqno(struct vc5_screen *screen, uint64_t seqno, uint64_t timeout_ns, - const char *reason); - -void -vc5_bufmgr_destroy(struct pipe_screen *pscreen); - -#endif /* VC5_BUFMGR_H */ - diff --git a/src/gallium/drivers/vc5/vc5_cl.c b/src/gallium/drivers/vc5/vc5_cl.c deleted file mode 100644 index a10c1649e0d..00000000000 --- a/src/gallium/drivers/vc5/vc5_cl.c +++ /dev/null @@ -1,90 +0,0 @@ -/* - * Copyright © 2014-2017 Broadcom - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "util/u_math.h" -#include "util/ralloc.h" -#include "vc5_context.h" -/* The branching packets are the same across V3D versions. */ -#define V3D_VERSION 33 -#include "broadcom/common/v3d_macros.h" -#include "broadcom/cle/v3dx_pack.h" - -void -vc5_init_cl(struct vc5_job *job, struct vc5_cl *cl) -{ - cl->base = NULL; - cl->next = cl->base; - cl->size = 0; - cl->job = job; -} - -uint32_t -vc5_cl_ensure_space(struct vc5_cl *cl, uint32_t space, uint32_t alignment) -{ - uint32_t offset = align(cl_offset(cl), alignment); - - if (offset + space <= cl->size) { - cl->next = cl->base + offset; - return offset; - } - - vc5_bo_unreference(&cl->bo); - cl->bo = vc5_bo_alloc(cl->job->vc5->screen, align(space, 4096), "CL"); - cl->base = vc5_bo_map(cl->bo); - cl->size = cl->bo->size; - cl->next = cl->base; - - return 0; -} - -void -vc5_cl_ensure_space_with_branch(struct vc5_cl *cl, uint32_t space) -{ - if (cl_offset(cl) + space + cl_packet_length(BRANCH) <= cl->size) - return; - - struct vc5_bo *new_bo = vc5_bo_alloc(cl->job->vc5->screen, 4096, "CL"); - assert(space <= new_bo->size); - - /* Chain to the new BO from the old one. */ - if (cl->bo) { - cl_emit(cl, BRANCH, branch) { - branch.address = cl_address(new_bo, 0); - } - vc5_bo_unreference(&cl->bo); - } else { - /* Root the first RCL/BCL BO in the job. */ - vc5_job_add_bo(cl->job, cl->bo); - } - - cl->bo = new_bo; - cl->base = vc5_bo_map(cl->bo); - cl->size = cl->bo->size; - cl->next = cl->base; -} - -void -vc5_destroy_cl(struct vc5_cl *cl) -{ - vc5_bo_unreference(&cl->bo); -} diff --git a/src/gallium/drivers/vc5/vc5_cl.h b/src/gallium/drivers/vc5/vc5_cl.h deleted file mode 100644 index 7025b5a672b..00000000000 --- a/src/gallium/drivers/vc5/vc5_cl.h +++ /dev/null @@ -1,279 +0,0 @@ -/* - * Copyright © 2014-2017 Broadcom - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#ifndef VC5_CL_H -#define VC5_CL_H - -#include - -#include "util/u_math.h" -#include "util/macros.h" - -struct vc5_bo; -struct vc5_job; -struct vc5_cl; - -/** - * Undefined structure, used for typechecking that you're passing the pointers - * to these functions correctly. - */ -struct vc5_cl_out; - -/** A reference to a BO used in the CL packing functions */ -struct vc5_cl_reloc { - struct vc5_bo *bo; - uint32_t offset; -}; - -static inline void cl_pack_emit_reloc(struct vc5_cl *cl, const struct vc5_cl_reloc *); - -#define __gen_user_data struct vc5_cl -#define __gen_address_type struct vc5_cl_reloc -#define __gen_address_offset(reloc) (((reloc)->bo ? (reloc)->bo->offset : 0) + \ - (reloc)->offset) -#define __gen_emit_reloc cl_pack_emit_reloc - -struct vc5_cl { - void *base; - struct vc5_job *job; - struct vc5_cl_out *next; - struct vc5_bo *bo; - uint32_t size; -}; - -void vc5_init_cl(struct vc5_job *job, struct vc5_cl *cl); -void vc5_destroy_cl(struct vc5_cl *cl); -void vc5_dump_cl(void *cl, uint32_t size, bool is_render); -uint32_t vc5_gem_hindex(struct vc5_job *job, struct vc5_bo *bo); - -struct PACKED unaligned_16 { uint16_t x; }; -struct PACKED unaligned_32 { uint32_t x; }; - -static inline uint32_t cl_offset(struct vc5_cl *cl) -{ - return (char *)cl->next - (char *)cl->base; -} - -static inline struct vc5_cl_reloc cl_get_address(struct vc5_cl *cl) -{ - return (struct vc5_cl_reloc){ .bo = cl->bo, .offset = cl_offset(cl) }; -} - -static inline void -cl_advance(struct vc5_cl_out **cl, uint32_t n) -{ - (*cl) = (struct vc5_cl_out *)((char *)(*cl) + n); -} - -static inline struct vc5_cl_out * -cl_start(struct vc5_cl *cl) -{ - return cl->next; -} - -static inline void -cl_end(struct vc5_cl *cl, struct vc5_cl_out *next) -{ - cl->next = next; - assert(cl_offset(cl) <= cl->size); -} - - -static inline void -put_unaligned_32(struct vc5_cl_out *ptr, uint32_t val) -{ - struct unaligned_32 *p = (void *)ptr; - p->x = val; -} - -static inline void -put_unaligned_16(struct vc5_cl_out *ptr, uint16_t val) -{ - struct unaligned_16 *p = (void *)ptr; - p->x = val; -} - -static inline void -cl_u8(struct vc5_cl_out **cl, uint8_t n) -{ - *(uint8_t *)(*cl) = n; - cl_advance(cl, 1); -} - -static inline void -cl_u16(struct vc5_cl_out **cl, uint16_t n) -{ - put_unaligned_16(*cl, n); - cl_advance(cl, 2); -} - -static inline void -cl_u32(struct vc5_cl_out **cl, uint32_t n) -{ - put_unaligned_32(*cl, n); - cl_advance(cl, 4); -} - -static inline void -cl_aligned_u32(struct vc5_cl_out **cl, uint32_t n) -{ - *(uint32_t *)(*cl) = n; - cl_advance(cl, 4); -} - -static inline void -cl_aligned_reloc(struct vc5_cl *cl, - struct vc5_cl_out **cl_out, - struct vc5_bo *bo, uint32_t offset) -{ - cl_aligned_u32(cl_out, bo->offset + offset); - vc5_job_add_bo(cl->job, bo); -} - -static inline void -cl_ptr(struct vc5_cl_out **cl, void *ptr) -{ - *(struct vc5_cl_out **)(*cl) = ptr; - cl_advance(cl, sizeof(void *)); -} - -static inline void -cl_f(struct vc5_cl_out **cl, float f) -{ - cl_u32(cl, fui(f)); -} - -static inline void -cl_aligned_f(struct vc5_cl_out **cl, float f) -{ - cl_aligned_u32(cl, fui(f)); -} - -/** - * Reference to a BO with its associated offset, used in the pack process. - */ -static inline struct vc5_cl_reloc -cl_address(struct vc5_bo *bo, uint32_t offset) -{ - struct vc5_cl_reloc reloc = { - .bo = bo, - .offset = offset, - }; - return reloc; -} - -uint32_t vc5_cl_ensure_space(struct vc5_cl *cl, uint32_t size, uint32_t align); -void vc5_cl_ensure_space_with_branch(struct vc5_cl *cl, uint32_t size); - -#define cl_packet_header(packet) V3DX(packet ## _header) -#define cl_packet_length(packet) V3DX(packet ## _length) -#define cl_packet_pack(packet) V3DX(packet ## _pack) -#define cl_packet_struct(packet) V3DX(packet) - -static inline void * -cl_get_emit_space(struct vc5_cl_out **cl, size_t size) -{ - void *addr = *cl; - cl_advance(cl, size); - return addr; -} - -/* Macro for setting up an emit of a CL struct. A temporary unpacked struct - * is created, which you get to set fields in of the form: - * - * cl_emit(bcl, FLAT_SHADE_FLAGS, flags) { - * .flags.flat_shade_flags = 1 << 2, - * } - * - * or default values only can be emitted with just: - * - * cl_emit(bcl, FLAT_SHADE_FLAGS, flags); - * - * The trick here is that we make a for loop that will execute the body - * (either the block or the ';' after the macro invocation) exactly once. - */ -#define cl_emit(cl, packet, name) \ - for (struct cl_packet_struct(packet) name = { \ - cl_packet_header(packet) \ - }, \ - *_loop_terminate = &name; \ - __builtin_expect(_loop_terminate != NULL, 1); \ - ({ \ - struct vc5_cl_out *cl_out = cl_start(cl); \ - cl_packet_pack(packet)(cl, (uint8_t *)cl_out, &name); \ - cl_advance(&cl_out, cl_packet_length(packet)); \ - cl_end(cl, cl_out); \ - _loop_terminate = NULL; \ - })) \ - -#define cl_emit_with_prepacked(cl, packet, prepacked, name) \ - for (struct cl_packet_struct(packet) name = { \ - cl_packet_header(packet) \ - }, \ - *_loop_terminate = &name; \ - __builtin_expect(_loop_terminate != NULL, 1); \ - ({ \ - struct vc5_cl_out *cl_out = cl_start(cl); \ - uint8_t packed[cl_packet_length(packet)]; \ - cl_packet_pack(packet)(cl, packed, &name); \ - for (int _i = 0; _i < cl_packet_length(packet); _i++) \ - ((uint8_t *)cl_out)[_i] = packed[_i] | (prepacked)[_i]; \ - cl_advance(&cl_out, cl_packet_length(packet)); \ - cl_end(cl, cl_out); \ - _loop_terminate = NULL; \ - })) \ - -#define cl_emit_prepacked(cl, packet) do { \ - memcpy((cl)->next, packet, sizeof(*packet)); \ - cl_advance(&(cl)->next, sizeof(*packet)); \ -} while (0) - -#define v3dx_pack(packed, packet, name) \ - for (struct cl_packet_struct(packet) name = { \ - cl_packet_header(packet) \ - }, \ - *_loop_terminate = &name; \ - __builtin_expect(_loop_terminate != NULL, 1); \ - ({ \ - cl_packet_pack(packet)(NULL, (uint8_t *)packed, &name); \ - VG(VALGRIND_CHECK_MEM_IS_DEFINED((uint8_t *)packed, \ - cl_packet_length(packet))); \ - _loop_terminate = NULL; \ - })) \ - -/** - * Helper function called by the XML-generated pack functions for filling in - * an address field in shader records. - * - * Since we have a private address space as of VC5, our BOs can have lifelong - * offsets, and all the kernel needs to know is which BOs need to be paged in - * for this exec. - */ -static inline void -cl_pack_emit_reloc(struct vc5_cl *cl, const struct vc5_cl_reloc *reloc) -{ - if (reloc->bo) - vc5_job_add_bo(cl->job, reloc->bo); -} - -#endif /* VC5_CL_H */ diff --git a/src/gallium/drivers/vc5/vc5_context.c b/src/gallium/drivers/vc5/vc5_context.c deleted file mode 100644 index b6d1234879b..00000000000 --- a/src/gallium/drivers/vc5/vc5_context.c +++ /dev/null @@ -1,183 +0,0 @@ -/* - * Copyright © 2014-2017 Broadcom - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include -#include - -#include "pipe/p_defines.h" -#include "util/hash_table.h" -#include "util/ralloc.h" -#include "util/u_inlines.h" -#include "util/u_memory.h" -#include "util/u_blitter.h" -#include "util/u_upload_mgr.h" -#include "indices/u_primconvert.h" -#include "pipe/p_screen.h" - -#include "vc5_screen.h" -#include "vc5_context.h" -#include "vc5_resource.h" - -void -vc5_flush(struct pipe_context *pctx) -{ - struct vc5_context *vc5 = vc5_context(pctx); - - struct hash_entry *entry; - hash_table_foreach(vc5->jobs, entry) { - struct vc5_job *job = entry->data; - vc5_job_submit(vc5, job); - } -} - -static void -vc5_pipe_flush(struct pipe_context *pctx, struct pipe_fence_handle **fence, - unsigned flags) -{ - struct vc5_context *vc5 = vc5_context(pctx); - - vc5_flush(pctx); - - if (fence) { - struct pipe_screen *screen = pctx->screen; - struct vc5_fence *f = vc5_fence_create(vc5); - screen->fence_reference(screen, fence, NULL); - *fence = (struct pipe_fence_handle *)f; - } -} - -static void -vc5_invalidate_resource(struct pipe_context *pctx, struct pipe_resource *prsc) -{ - struct vc5_context *vc5 = vc5_context(pctx); - struct vc5_resource *rsc = vc5_resource(prsc); - - rsc->initialized_buffers = 0; - - struct hash_entry *entry = _mesa_hash_table_search(vc5->write_jobs, - prsc); - if (!entry) - return; - - struct vc5_job *job = entry->data; - if (job->key.zsbuf && job->key.zsbuf->texture == prsc) - job->resolve &= ~(PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL); -} - -static void -vc5_context_destroy(struct pipe_context *pctx) -{ - struct vc5_context *vc5 = vc5_context(pctx); - - vc5_flush(pctx); - - if (vc5->blitter) - util_blitter_destroy(vc5->blitter); - - if (vc5->primconvert) - util_primconvert_destroy(vc5->primconvert); - - if (vc5->uploader) - u_upload_destroy(vc5->uploader); - - slab_destroy_child(&vc5->transfer_pool); - - pipe_surface_reference(&vc5->framebuffer.cbufs[0], NULL); - pipe_surface_reference(&vc5->framebuffer.zsbuf, NULL); - - vc5_program_fini(pctx); - - ralloc_free(vc5); -} - -struct pipe_context * -vc5_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags) -{ - struct vc5_screen *screen = vc5_screen(pscreen); - struct vc5_context *vc5; - - /* Prevent dumping of the shaders built during context setup. */ - uint32_t saved_shaderdb_flag = V3D_DEBUG & V3D_DEBUG_SHADERDB; - V3D_DEBUG &= ~V3D_DEBUG_SHADERDB; - - vc5 = rzalloc(NULL, struct vc5_context); - if (!vc5) - return NULL; - struct pipe_context *pctx = &vc5->base; - - vc5->screen = screen; - - int ret = drmSyncobjCreate(screen->fd, DRM_SYNCOBJ_CREATE_SIGNALED, - &vc5->out_sync); - if (ret) { - ralloc_free(vc5); - return NULL; - } - - pctx->screen = pscreen; - pctx->priv = priv; - pctx->destroy = vc5_context_destroy; - pctx->flush = vc5_pipe_flush; - pctx->invalidate_resource = vc5_invalidate_resource; - - if (screen->devinfo.ver >= 41) { - v3d41_draw_init(pctx); - v3d41_state_init(pctx); - } else { - v3d33_draw_init(pctx); - v3d33_state_init(pctx); - } - vc5_program_init(pctx); - vc5_query_init(pctx); - vc5_resource_context_init(pctx); - - vc5_job_init(vc5); - - vc5->fd = screen->fd; - - slab_create_child(&vc5->transfer_pool, &screen->transfer_pool); - - vc5->uploader = u_upload_create_default(&vc5->base); - vc5->base.stream_uploader = vc5->uploader; - vc5->base.const_uploader = vc5->uploader; - - vc5->blitter = util_blitter_create(pctx); - if (!vc5->blitter) - goto fail; - - vc5->primconvert = util_primconvert_create(pctx, - (1 << PIPE_PRIM_QUADS) - 1); - if (!vc5->primconvert) - goto fail; - - V3D_DEBUG |= saved_shaderdb_flag; - - vc5->sample_mask = (1 << VC5_MAX_SAMPLES) - 1; - vc5->active_queries = true; - - return &vc5->base; - -fail: - pctx->destroy(pctx); - return NULL; -} diff --git a/src/gallium/drivers/vc5/vc5_context.h b/src/gallium/drivers/vc5/vc5_context.h deleted file mode 100644 index 5df02b3907d..00000000000 --- a/src/gallium/drivers/vc5/vc5_context.h +++ /dev/null @@ -1,565 +0,0 @@ -/* - * Copyright © 2014-2017 Broadcom - * Copyright (C) 2012 Rob Clark - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#ifndef VC5_CONTEXT_H -#define VC5_CONTEXT_H - -#ifdef V3D_VERSION -#include "broadcom/common/v3d_macros.h" -#endif - -#include - -#include "pipe/p_context.h" -#include "pipe/p_state.h" -#include "util/bitset.h" -#include "util/slab.h" -#include "xf86drm.h" -#include "v3d_drm.h" -#include "vc5_screen.h" - -struct vc5_job; -struct vc5_bo; -void vc5_job_add_bo(struct vc5_job *job, struct vc5_bo *bo); - -#include "vc5_bufmgr.h" -#include "vc5_resource.h" -#include "vc5_cl.h" - -#ifdef USE_V3D_SIMULATOR -#define using_vc5_simulator true -#else -#define using_vc5_simulator false -#endif - -#define VC5_DIRTY_BLEND (1 << 0) -#define VC5_DIRTY_RASTERIZER (1 << 1) -#define VC5_DIRTY_ZSA (1 << 2) -#define VC5_DIRTY_FRAGTEX (1 << 3) -#define VC5_DIRTY_VERTTEX (1 << 4) - -#define VC5_DIRTY_BLEND_COLOR (1 << 7) -#define VC5_DIRTY_STENCIL_REF (1 << 8) -#define VC5_DIRTY_SAMPLE_MASK (1 << 9) -#define VC5_DIRTY_FRAMEBUFFER (1 << 10) -#define VC5_DIRTY_STIPPLE (1 << 11) -#define VC5_DIRTY_VIEWPORT (1 << 12) -#define VC5_DIRTY_CONSTBUF (1 << 13) -#define VC5_DIRTY_VTXSTATE (1 << 14) -#define VC5_DIRTY_VTXBUF (1 << 15) -#define VC5_DIRTY_SCISSOR (1 << 17) -#define VC5_DIRTY_FLAT_SHADE_FLAGS (1 << 18) -#define VC5_DIRTY_PRIM_MODE (1 << 19) -#define VC5_DIRTY_CLIP (1 << 20) -#define VC5_DIRTY_UNCOMPILED_VS (1 << 21) -#define VC5_DIRTY_UNCOMPILED_FS (1 << 22) -#define VC5_DIRTY_COMPILED_CS (1 << 23) -#define VC5_DIRTY_COMPILED_VS (1 << 24) -#define VC5_DIRTY_COMPILED_FS (1 << 25) -#define VC5_DIRTY_FS_INPUTS (1 << 26) -#define VC5_DIRTY_STREAMOUT (1 << 27) -#define VC5_DIRTY_OQ (1 << 28) -#define VC5_DIRTY_CENTROID_FLAGS (1 << 29) - -#define VC5_MAX_FS_INPUTS 64 - -struct vc5_sampler_view { - struct pipe_sampler_view base; - uint32_t p0; - uint32_t p1; - /* Precomputed swizzles to pass in to the shader key. */ - uint8_t swizzle[4]; - - uint8_t texture_shader_state[32]; - /* V3D 4.x: Texture state struct. */ - struct vc5_bo *bo; -}; - -struct vc5_sampler_state { - struct pipe_sampler_state base; - uint32_t p0; - uint32_t p1; - - /* V3D 3.x: Packed texture state. */ - uint8_t texture_shader_state[32]; - /* V3D 4.x: Sampler state struct. */ - struct vc5_bo *bo; -}; - -struct vc5_texture_stateobj { - struct pipe_sampler_view *textures[PIPE_MAX_SAMPLERS]; - unsigned num_textures; - struct pipe_sampler_state *samplers[PIPE_MAX_SAMPLERS]; - unsigned num_samplers; - struct vc5_cl_reloc texture_state[PIPE_MAX_SAMPLERS]; -}; - -struct vc5_shader_uniform_info { - enum quniform_contents *contents; - uint32_t *data; - uint32_t count; -}; - -struct vc5_uncompiled_shader { - /** A name for this program, so you can track it in shader-db output. */ - uint32_t program_id; - /** How many variants of this program were compiled, for shader-db. */ - uint32_t compiled_variant_count; - struct pipe_shader_state base; - uint32_t num_tf_outputs; - struct v3d_varying_slot *tf_outputs; - uint16_t tf_specs[16]; - uint16_t tf_specs_psiz[16]; - uint32_t num_tf_specs; - - /** - * Flag for if the NIR in this shader originally came from TGSI. If - * so, we need to do some fixups at compile time, due to missing - * information in TGSI that exists in NIR. - */ - bool was_tgsi; -}; - -struct vc5_compiled_shader { - struct vc5_bo *bo; - - union { - struct v3d_prog_data *base; - struct v3d_vs_prog_data *vs; - struct v3d_fs_prog_data *fs; - } prog_data; - - /** - * VC5_DIRTY_* flags that, when set in vc5->dirty, mean that the - * uniforms have to be rewritten (and therefore the shader state - * reemitted). - */ - uint32_t uniform_dirty_bits; -}; - -struct vc5_program_stateobj { - struct vc5_uncompiled_shader *bind_vs, *bind_fs; - struct vc5_compiled_shader *cs, *vs, *fs; - - struct vc5_bo *spill_bo; - int spill_size_per_thread; -}; - -struct vc5_constbuf_stateobj { - struct pipe_constant_buffer cb[PIPE_MAX_CONSTANT_BUFFERS]; - uint32_t enabled_mask; - uint32_t dirty_mask; -}; - -struct vc5_vertexbuf_stateobj { - struct pipe_vertex_buffer vb[PIPE_MAX_ATTRIBS]; - unsigned count; - uint32_t enabled_mask; - uint32_t dirty_mask; -}; - -struct vc5_vertex_stateobj { - struct pipe_vertex_element pipe[VC5_MAX_ATTRIBUTES]; - unsigned num_elements; - - uint8_t attrs[12 * VC5_MAX_ATTRIBUTES]; - struct vc5_bo *default_attribute_values; -}; - -struct vc5_streamout_stateobj { - struct pipe_stream_output_target *targets[PIPE_MAX_SO_BUFFERS]; - unsigned num_targets; -}; - -/* Hash table key for vc5->jobs */ -struct vc5_job_key { - struct pipe_surface *cbufs[4]; - struct pipe_surface *zsbuf; -}; - -enum vc5_ez_state { - VC5_EZ_UNDECIDED = 0, - VC5_EZ_GT_GE, - VC5_EZ_LT_LE, - VC5_EZ_DISABLED, -}; - -/** - * A complete bin/render job. - * - * This is all of the state necessary to submit a bin/render to the kernel. - * We want to be able to have multiple in progress at a time, so that we don't - * need to flush an existing CL just to switch to rendering to a new render - * target (which would mean reading back from the old render target when - * starting to render to it again). - */ -struct vc5_job { - struct vc5_context *vc5; - struct vc5_cl bcl; - struct vc5_cl rcl; - struct vc5_cl indirect; - struct vc5_bo *tile_alloc; - struct vc5_bo *tile_state; - uint32_t shader_rec_count; - - struct drm_v3d_submit_cl submit; - - /** - * Set of all BOs referenced by the job. This will be used for making - * the list of BOs that the kernel will need to have paged in to - * execute our job. - */ - struct set *bos; - - /** Sum of the sizes of the BOs referenced by the job. */ - uint32_t referenced_size; - - struct set *write_prscs; - - /* Size of the submit.bo_handles array. */ - uint32_t bo_handles_size; - - /** @{ Surfaces to submit rendering for. */ - struct pipe_surface *cbufs[4]; - struct pipe_surface *zsbuf; - /** @} */ - /** @{ - * Bounding box of the scissor across all queued drawing. - * - * Note that the max values are exclusive. - */ - uint32_t draw_min_x; - uint32_t draw_min_y; - uint32_t draw_max_x; - uint32_t draw_max_y; - /** @} */ - /** @{ - * Width/height of the color framebuffer being rendered to, - * for VC5_TILE_RENDERING_MODE_CONFIG. - */ - uint32_t draw_width; - uint32_t draw_height; - /** @} */ - /** @{ Tile information, depending on MSAA and float color buffer. */ - uint32_t draw_tiles_x; /** @< Number of tiles wide for framebuffer. */ - uint32_t draw_tiles_y; /** @< Number of tiles high for framebuffer. */ - - uint32_t tile_width; /** @< Width of a tile. */ - uint32_t tile_height; /** @< Height of a tile. */ - /** maximum internal_bpp of all color render targets. */ - uint32_t internal_bpp; - - /** Whether the current rendering is in a 4X MSAA tile buffer. */ - bool msaa; - /** @} */ - - /* Bitmask of PIPE_CLEAR_* of buffers that were cleared before the - * first rendering. - */ - uint32_t cleared; - /* Bitmask of PIPE_CLEAR_* of buffers that have been rendered to - * (either clears or draws). - */ - uint32_t resolve; - uint32_t clear_color[4][4]; - float clear_z; - uint8_t clear_s; - - /** - * Set if some drawing (triangles, blits, or just a glClear()) has - * been done to the FBO, meaning that we need to - * DRM_IOCTL_VC5_SUBMIT_CL. - */ - bool needs_flush; - - /** - * Set if there is a nonzero address for OCCLUSION_QUERY_COUNTER. If - * so, we need to disable it and flush before ending the CL, to keep - * the next tile from starting with it enabled. - */ - bool oq_enabled; - - /** - * Set when a packet enabling TF on all further primitives has been - * emitted. - */ - bool tf_enabled; - - /** - * Current EZ state for drawing. Updated at the start of draw after - * we've decided on the shader being rendered. - */ - enum vc5_ez_state ez_state; - /** - * The first EZ state that was used for drawing with a decided EZ - * direction (so either UNDECIDED, GT, or LT). - */ - enum vc5_ez_state first_ez_state; - - /** - * Number of draw calls (not counting full buffer clears) queued in - * the current job. - */ - uint32_t draw_calls_queued; - - struct vc5_job_key key; -}; - -struct vc5_context { - struct pipe_context base; - - int fd; - struct vc5_screen *screen; - - /** The 3D rendering job for the currently bound FBO. */ - struct vc5_job *job; - - /* Map from struct vc5_job_key to the job for that FBO. - */ - struct hash_table *jobs; - - /** - * Map from vc5_resource to a job writing to that resource. - * - * Primarily for flushing jobs rendering to textures that are now - * being read from. - */ - struct hash_table *write_jobs; - - struct slab_child_pool transfer_pool; - struct blitter_context *blitter; - - /** bitfield of VC5_DIRTY_* */ - uint32_t dirty; - - struct primconvert_context *primconvert; - - struct hash_table *fs_cache, *vs_cache; - uint32_t next_uncompiled_program_id; - uint64_t next_compiled_program_id; - - struct vc5_compiler_state *compiler_state; - - uint8_t prim_mode; - - /** Maximum index buffer valid for the current shader_rec. */ - uint32_t max_index; - - /** Sync object that our RCL will update as its out_sync. */ - uint32_t out_sync; - - struct u_upload_mgr *uploader; - - /** @{ Current pipeline state objects */ - struct pipe_scissor_state scissor; - struct pipe_blend_state *blend; - struct vc5_rasterizer_state *rasterizer; - struct vc5_depth_stencil_alpha_state *zsa; - - struct vc5_texture_stateobj verttex, fragtex; - - struct vc5_program_stateobj prog; - - struct vc5_vertex_stateobj *vtx; - - struct { - struct pipe_blend_color f; - uint16_t hf[4]; - } blend_color; - struct pipe_stencil_ref stencil_ref; - unsigned sample_mask; - struct pipe_framebuffer_state framebuffer; - - /* Per render target, whether we should swap the R and B fields in the - * shader's color output and in blending. If render targets disagree - * on the R/B swap and use the constant color, then we would need to - * fall back to in-shader blending. - */ - uint8_t swap_color_rb; - - /* Per render target, whether we should treat the dst alpha values as - * one in blending. - * - * For RGBX formats, the tile buffer's alpha channel will be - * undefined. - */ - uint8_t blend_dst_alpha_one; - - bool active_queries; - - uint32_t tf_prims_generated; - uint32_t prims_generated; - - struct pipe_poly_stipple stipple; - struct pipe_clip_state clip; - struct pipe_viewport_state viewport; - struct vc5_constbuf_stateobj constbuf[PIPE_SHADER_TYPES]; - struct vc5_vertexbuf_stateobj vertexbuf; - struct vc5_streamout_stateobj streamout; - struct vc5_bo *current_oq; - /** @} */ -}; - -struct vc5_rasterizer_state { - struct pipe_rasterizer_state base; - - /* VC5_CONFIGURATION_BITS */ - uint8_t config_bits[3]; - - float point_size; - - /** - * Half-float (1/8/7 bits) value of polygon offset units for - * VC5_PACKET_DEPTH_OFFSET - */ - uint16_t offset_units; - /** - * Half-float (1/8/7 bits) value of polygon offset scale for - * VC5_PACKET_DEPTH_OFFSET - */ - uint16_t offset_factor; -}; - -struct vc5_depth_stencil_alpha_state { - struct pipe_depth_stencil_alpha_state base; - - enum vc5_ez_state ez_state; - - /** Uniforms for stencil state. - * - * Index 0 is either the front config, or the front-and-back config. - * Index 1 is the back config if doing separate back stencil. - * Index 2 is the writemask config if it's not a common mask value. - */ - uint32_t stencil_uniforms[3]; - - uint8_t stencil_front[6]; - uint8_t stencil_back[6]; -}; - -#define perf_debug(...) do { \ - if (unlikely(V3D_DEBUG & V3D_DEBUG_PERF)) \ - fprintf(stderr, __VA_ARGS__); \ -} while (0) - -static inline struct vc5_context * -vc5_context(struct pipe_context *pcontext) -{ - return (struct vc5_context *)pcontext; -} - -static inline struct vc5_sampler_view * -vc5_sampler_view(struct pipe_sampler_view *psview) -{ - return (struct vc5_sampler_view *)psview; -} - -static inline struct vc5_sampler_state * -vc5_sampler_state(struct pipe_sampler_state *psampler) -{ - return (struct vc5_sampler_state *)psampler; -} - -struct pipe_context *vc5_context_create(struct pipe_screen *pscreen, - void *priv, unsigned flags); -void vc5_program_init(struct pipe_context *pctx); -void vc5_program_fini(struct pipe_context *pctx); -void vc5_query_init(struct pipe_context *pctx); - -void vc5_simulator_init(struct vc5_screen *screen); -void vc5_simulator_destroy(struct vc5_screen *screen); -int vc5_simulator_flush(struct vc5_context *vc5, - struct drm_v3d_submit_cl *args, - struct vc5_job *job); -int vc5_simulator_ioctl(int fd, unsigned long request, void *arg); -void vc5_simulator_open_from_handle(int fd, uint32_t winsys_stride, - int handle, uint32_t size); - -static inline int -vc5_ioctl(int fd, unsigned long request, void *arg) -{ - if (using_vc5_simulator) - return vc5_simulator_ioctl(fd, request, arg); - else - return drmIoctl(fd, request, arg); -} - -void vc5_set_shader_uniform_dirty_flags(struct vc5_compiled_shader *shader); -struct vc5_cl_reloc vc5_write_uniforms(struct vc5_context *vc5, - struct vc5_compiled_shader *shader, - struct vc5_constbuf_stateobj *cb, - struct vc5_texture_stateobj *texstate); - -void vc5_flush(struct pipe_context *pctx); -void vc5_job_init(struct vc5_context *vc5); -struct vc5_job *vc5_get_job(struct vc5_context *vc5, - struct pipe_surface **cbufs, - struct pipe_surface *zsbuf); -struct vc5_job *vc5_get_job_for_fbo(struct vc5_context *vc5); -void vc5_job_add_bo(struct vc5_job *job, struct vc5_bo *bo); -void vc5_job_add_write_resource(struct vc5_job *job, struct pipe_resource *prsc); -void vc5_job_submit(struct vc5_context *vc5, struct vc5_job *job); -void vc5_flush_jobs_writing_resource(struct vc5_context *vc5, - struct pipe_resource *prsc); -void vc5_flush_jobs_reading_resource(struct vc5_context *vc5, - struct pipe_resource *prsc); -void vc5_update_compiled_shaders(struct vc5_context *vc5, uint8_t prim_mode); - -bool vc5_rt_format_supported(const struct v3d_device_info *devinfo, - enum pipe_format f); -bool vc5_tex_format_supported(const struct v3d_device_info *devinfo, - enum pipe_format f); -uint8_t vc5_get_rt_format(const struct v3d_device_info *devinfo, enum pipe_format f); -uint8_t vc5_get_tex_format(const struct v3d_device_info *devinfo, enum pipe_format f); -uint8_t vc5_get_tex_return_size(const struct v3d_device_info *devinfo, - enum pipe_format f, - enum pipe_tex_compare compare); -uint8_t vc5_get_tex_return_channels(const struct v3d_device_info *devinfo, - enum pipe_format f); -const uint8_t *vc5_get_format_swizzle(const struct v3d_device_info *devinfo, - enum pipe_format f); -void vc5_get_internal_type_bpp_for_output_format(const struct v3d_device_info *devinfo, - uint32_t format, - uint32_t *type, - uint32_t *bpp); - -void vc5_init_query_functions(struct vc5_context *vc5); -void vc5_blit(struct pipe_context *pctx, const struct pipe_blit_info *blit_info); -void vc5_blitter_save(struct vc5_context *vc5); - -struct vc5_fence *vc5_fence_create(struct vc5_context *vc5); - -#ifdef v3dX -# include "v3dx_context.h" -#else -# define v3dX(x) v3d33_##x -# include "v3dx_context.h" -# undef v3dX - -# define v3dX(x) v3d41_##x -# include "v3dx_context.h" -# undef v3dX -#endif - -#endif /* VC5_CONTEXT_H */ diff --git a/src/gallium/drivers/vc5/vc5_draw.c b/src/gallium/drivers/vc5/vc5_draw.c deleted file mode 100644 index ecb1aa37831..00000000000 --- a/src/gallium/drivers/vc5/vc5_draw.c +++ /dev/null @@ -1,714 +0,0 @@ -/* - * Copyright © 2014-2017 Broadcom - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "util/u_blitter.h" -#include "util/u_prim.h" -#include "util/u_format.h" -#include "util/u_pack_color.h" -#include "util/u_prim_restart.h" -#include "util/u_upload_mgr.h" -#include "indices/u_primconvert.h" - -#include "vc5_context.h" -#include "vc5_resource.h" -#include "vc5_cl.h" -#include "broadcom/compiler/v3d_compiler.h" -#include "broadcom/common/v3d_macros.h" -#include "broadcom/cle/v3dx_pack.h" - -/** - * Does the initial bining command list setup for drawing to a given FBO. - */ -static void -vc5_start_draw(struct vc5_context *vc5) -{ - struct vc5_job *job = vc5->job; - - if (job->needs_flush) - return; - - /* Get space to emit our BCL state, using a branch to jump to a new BO - * if necessary. - */ - vc5_cl_ensure_space_with_branch(&job->bcl, 256 /* XXX */); - - job->submit.bcl_start = job->bcl.bo->offset; - vc5_job_add_bo(job, job->bcl.bo); - - job->tile_alloc = vc5_bo_alloc(vc5->screen, 1024 * 1024, "tile alloc"); - uint32_t tsda_per_tile_size = vc5->screen->devinfo.ver >= 40 ? 256 : 64; - job->tile_state = vc5_bo_alloc(vc5->screen, - job->draw_tiles_y * - job->draw_tiles_x * - tsda_per_tile_size, - "TSDA"); - -#if V3D_VERSION < 40 - /* "Binning mode lists start with a Tile Binning Mode Configuration - * item (120)" - * - * Part1 signals the end of binning config setup. - */ - cl_emit(&job->bcl, TILE_BINNING_MODE_CONFIGURATION_PART2, config) { - config.tile_allocation_memory_address = - cl_address(job->tile_alloc, 0); - config.tile_allocation_memory_size = job->tile_alloc->size; - } -#endif - - cl_emit(&job->bcl, TILE_BINNING_MODE_CONFIGURATION_PART1, config) { -#if V3D_VERSION >= 40 - config.width_in_pixels_minus_1 = vc5->framebuffer.width - 1; - config.height_in_pixels_minus_1 = vc5->framebuffer.height - 1; - config.number_of_render_targets_minus_1 = - MAX2(vc5->framebuffer.nr_cbufs, 1) - 1; -#else /* V3D_VERSION < 40 */ - config.tile_state_data_array_base_address = - cl_address(job->tile_state, 0); - - config.width_in_tiles = job->draw_tiles_x; - config.height_in_tiles = job->draw_tiles_y; - /* Must be >= 1 */ - config.number_of_render_targets = - MAX2(vc5->framebuffer.nr_cbufs, 1); -#endif /* V3D_VERSION < 40 */ - - config.multisample_mode_4x = job->msaa; - - config.maximum_bpp_of_all_render_targets = job->internal_bpp; - } - - /* There's definitely nothing in the VCD cache we want. */ - cl_emit(&job->bcl, FLUSH_VCD_CACHE, bin); - - /* Disable any leftover OQ state from another job. */ - cl_emit(&job->bcl, OCCLUSION_QUERY_COUNTER, counter); - - /* "Binning mode lists must have a Start Tile Binning item (6) after - * any prefix state data before the binning list proper starts." - */ - cl_emit(&job->bcl, START_TILE_BINNING, bin); - - job->needs_flush = true; - job->draw_width = vc5->framebuffer.width; - job->draw_height = vc5->framebuffer.height; -} - -static void -vc5_predraw_check_textures(struct pipe_context *pctx, - struct vc5_texture_stateobj *stage_tex) -{ - struct vc5_context *vc5 = vc5_context(pctx); - - for (int i = 0; i < stage_tex->num_textures; i++) { - struct pipe_sampler_view *view = stage_tex->textures[i]; - if (!view) - continue; - - vc5_flush_jobs_writing_resource(vc5, view->texture); - } -} - -static void -vc5_emit_gl_shader_state(struct vc5_context *vc5, - const struct pipe_draw_info *info) -{ - struct vc5_job *job = vc5->job; - /* VC5_DIRTY_VTXSTATE */ - struct vc5_vertex_stateobj *vtx = vc5->vtx; - /* VC5_DIRTY_VTXBUF */ - struct vc5_vertexbuf_stateobj *vertexbuf = &vc5->vertexbuf; - - /* Upload the uniforms to the indirect CL first */ - struct vc5_cl_reloc fs_uniforms = - vc5_write_uniforms(vc5, vc5->prog.fs, - &vc5->constbuf[PIPE_SHADER_FRAGMENT], - &vc5->fragtex); - struct vc5_cl_reloc vs_uniforms = - vc5_write_uniforms(vc5, vc5->prog.vs, - &vc5->constbuf[PIPE_SHADER_VERTEX], - &vc5->verttex); - struct vc5_cl_reloc cs_uniforms = - vc5_write_uniforms(vc5, vc5->prog.cs, - &vc5->constbuf[PIPE_SHADER_VERTEX], - &vc5->verttex); - - /* See GFXH-930 workaround below */ - uint32_t num_elements_to_emit = MAX2(vtx->num_elements, 1); - uint32_t shader_rec_offset = - vc5_cl_ensure_space(&job->indirect, - cl_packet_length(GL_SHADER_STATE_RECORD) + - num_elements_to_emit * - cl_packet_length(GL_SHADER_STATE_ATTRIBUTE_RECORD), - 32); - - cl_emit(&job->indirect, GL_SHADER_STATE_RECORD, shader) { - shader.enable_clipping = true; - /* VC5_DIRTY_PRIM_MODE | VC5_DIRTY_RASTERIZER */ - shader.point_size_in_shaded_vertex_data = - (info->mode == PIPE_PRIM_POINTS && - vc5->rasterizer->base.point_size_per_vertex); - - /* Must be set if the shader modifies Z, discards, or modifies - * the sample mask. For any of these cases, the fragment - * shader needs to write the Z value (even just discards). - */ - shader.fragment_shader_does_z_writes = - (vc5->prog.fs->prog_data.fs->writes_z || - vc5->prog.fs->prog_data.fs->discard); - - shader.fragment_shader_uses_real_pixel_centre_w_in_addition_to_centroid_w2 = - vc5->prog.fs->prog_data.fs->uses_centroid_and_center_w; - - shader.number_of_varyings_in_fragment_shader = - vc5->prog.fs->prog_data.base->num_inputs; - - shader.propagate_nans = true; - - shader.coordinate_shader_code_address = - cl_address(vc5->prog.cs->bo, 0); - shader.vertex_shader_code_address = - cl_address(vc5->prog.vs->bo, 0); - shader.fragment_shader_code_address = - cl_address(vc5->prog.fs->bo, 0); - - /* XXX: Use combined input/output size flag in the common - * case. - */ - shader.coordinate_shader_has_separate_input_and_output_vpm_blocks = true; - shader.vertex_shader_has_separate_input_and_output_vpm_blocks = true; - shader.coordinate_shader_input_vpm_segment_size = - MAX2(vc5->prog.cs->prog_data.vs->vpm_input_size, 1); - shader.vertex_shader_input_vpm_segment_size = - MAX2(vc5->prog.vs->prog_data.vs->vpm_input_size, 1); - - shader.coordinate_shader_output_vpm_segment_size = - vc5->prog.cs->prog_data.vs->vpm_output_size; - shader.vertex_shader_output_vpm_segment_size = - vc5->prog.vs->prog_data.vs->vpm_output_size; - - shader.coordinate_shader_uniforms_address = cs_uniforms; - shader.vertex_shader_uniforms_address = vs_uniforms; - shader.fragment_shader_uniforms_address = fs_uniforms; - -#if V3D_VERSION >= 41 - shader.coordinate_shader_4_way_threadable = - vc5->prog.cs->prog_data.vs->base.threads == 4; - shader.vertex_shader_4_way_threadable = - vc5->prog.vs->prog_data.vs->base.threads == 4; - shader.fragment_shader_4_way_threadable = - vc5->prog.fs->prog_data.fs->base.threads == 4; - - shader.coordinate_shader_start_in_final_thread_section = - vc5->prog.cs->prog_data.vs->base.single_seg; - shader.vertex_shader_start_in_final_thread_section = - vc5->prog.vs->prog_data.vs->base.single_seg; - shader.fragment_shader_start_in_final_thread_section = - vc5->prog.fs->prog_data.fs->base.single_seg; -#else - shader.coordinate_shader_4_way_threadable = - vc5->prog.cs->prog_data.vs->base.threads == 4; - shader.coordinate_shader_2_way_threadable = - vc5->prog.cs->prog_data.vs->base.threads == 2; - shader.vertex_shader_4_way_threadable = - vc5->prog.vs->prog_data.vs->base.threads == 4; - shader.vertex_shader_2_way_threadable = - vc5->prog.vs->prog_data.vs->base.threads == 2; - shader.fragment_shader_4_way_threadable = - vc5->prog.fs->prog_data.fs->base.threads == 4; - shader.fragment_shader_2_way_threadable = - vc5->prog.fs->prog_data.fs->base.threads == 2; -#endif - - shader.vertex_id_read_by_coordinate_shader = - vc5->prog.cs->prog_data.vs->uses_vid; - shader.instance_id_read_by_coordinate_shader = - vc5->prog.cs->prog_data.vs->uses_iid; - shader.vertex_id_read_by_vertex_shader = - vc5->prog.vs->prog_data.vs->uses_vid; - shader.instance_id_read_by_vertex_shader = - vc5->prog.vs->prog_data.vs->uses_iid; - - shader.address_of_default_attribute_values = - cl_address(vtx->default_attribute_values, 0); - } - - for (int i = 0; i < vtx->num_elements; i++) { - struct pipe_vertex_element *elem = &vtx->pipe[i]; - struct pipe_vertex_buffer *vb = - &vertexbuf->vb[elem->vertex_buffer_index]; - struct vc5_resource *rsc = vc5_resource(vb->buffer.resource); - - const uint32_t size = - cl_packet_length(GL_SHADER_STATE_ATTRIBUTE_RECORD); - cl_emit_with_prepacked(&job->indirect, - GL_SHADER_STATE_ATTRIBUTE_RECORD, - &vtx->attrs[i * size], attr) { - attr.stride = vb->stride; - attr.address = cl_address(rsc->bo, - vb->buffer_offset + - elem->src_offset); - attr.number_of_values_read_by_coordinate_shader = - vc5->prog.cs->prog_data.vs->vattr_sizes[i]; - attr.number_of_values_read_by_vertex_shader = - vc5->prog.vs->prog_data.vs->vattr_sizes[i]; -#if V3D_VERSION >= 41 - attr.maximum_index = 0xffffff; -#endif - } - } - - if (vtx->num_elements == 0) { - /* GFXH-930: At least one attribute must be enabled and read - * by CS and VS. If we have no attributes being consumed by - * the shader, set up a dummy to be loaded into the VPM. - */ - cl_emit(&job->indirect, GL_SHADER_STATE_ATTRIBUTE_RECORD, attr) { - /* Valid address of data whose value will be unused. */ - attr.address = cl_address(job->indirect.bo, 0); - - attr.type = ATTRIBUTE_FLOAT; - attr.stride = 0; - attr.vec_size = 1; - - attr.number_of_values_read_by_coordinate_shader = 1; - attr.number_of_values_read_by_vertex_shader = 1; - } - } - - cl_emit(&job->bcl, GL_SHADER_STATE, state) { - state.address = cl_address(job->indirect.bo, shader_rec_offset); - state.number_of_attribute_arrays = num_elements_to_emit; - } - - vc5_bo_unreference(&cs_uniforms.bo); - vc5_bo_unreference(&vs_uniforms.bo); - vc5_bo_unreference(&fs_uniforms.bo); - - job->shader_rec_count++; -} - -/** - * Computes the various transform feedback statistics, since they can't be - * recorded by CL packets. - */ -static void -vc5_tf_statistics_record(struct vc5_context *vc5, - const struct pipe_draw_info *info, - bool prim_tf) -{ - if (!vc5->active_queries) - return; - - uint32_t prims = u_prims_for_vertices(info->mode, info->count); - vc5->prims_generated += prims; - - if (prim_tf) { - /* XXX: Only count if we didn't overflow. */ - vc5->tf_prims_generated += prims; - } -} - -static void -vc5_update_job_ez(struct vc5_context *vc5, struct vc5_job *job) -{ - switch (vc5->zsa->ez_state) { - case VC5_EZ_UNDECIDED: - /* If the Z/S state didn't pick a direction but didn't - * disable, then go along with the current EZ state. This - * allows EZ optimization for Z func == EQUAL or NEVER. - */ - break; - - case VC5_EZ_LT_LE: - case VC5_EZ_GT_GE: - /* If the Z/S state picked a direction, then it needs to match - * the current direction if we've decided on one. - */ - if (job->ez_state == VC5_EZ_UNDECIDED) - job->ez_state = vc5->zsa->ez_state; - else if (job->ez_state != vc5->zsa->ez_state) - job->ez_state = VC5_EZ_DISABLED; - break; - - case VC5_EZ_DISABLED: - /* If the current Z/S state disables EZ because of a bad Z - * func or stencil operation, then we can't do any more EZ in - * this frame. - */ - job->ez_state = VC5_EZ_DISABLED; - break; - } - - /* If the FS affects the Z of the pixels, then it may update against - * the chosen EZ direction (though we could use - * ARB_conservative_depth's hints to avoid this) - */ - if (vc5->prog.fs->prog_data.fs->writes_z) { - job->ez_state = VC5_EZ_DISABLED; - } - - if (job->first_ez_state == VC5_EZ_UNDECIDED) - job->first_ez_state = job->ez_state; -} - -static void -vc5_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info) -{ - struct vc5_context *vc5 = vc5_context(pctx); - - if (!info->count_from_stream_output && !info->indirect && - !info->primitive_restart && - !u_trim_pipe_prim(info->mode, (unsigned*)&info->count)) - return; - - /* Fall back for weird desktop GL primitive restart values. */ - if (info->primitive_restart && - info->index_size) { - uint32_t mask = ~0; - - switch (info->index_size) { - case 2: - mask = 0xffff; - break; - case 1: - mask = 0xff; - break; - } - - if (info->restart_index != mask) { - util_draw_vbo_without_prim_restart(pctx, info); - return; - } - } - - if (info->mode >= PIPE_PRIM_QUADS) { - util_primconvert_save_rasterizer_state(vc5->primconvert, &vc5->rasterizer->base); - util_primconvert_draw_vbo(vc5->primconvert, info); - perf_debug("Fallback conversion for %d %s vertices\n", - info->count, u_prim_name(info->mode)); - return; - } - - /* Before setting up the draw, flush anything writing to the textures - * that we read from. - */ - vc5_predraw_check_textures(pctx, &vc5->verttex); - vc5_predraw_check_textures(pctx, &vc5->fragtex); - - struct vc5_job *job = vc5_get_job_for_fbo(vc5); - - /* Get space to emit our draw call into the BCL, using a branch to - * jump to a new BO if necessary. - */ - vc5_cl_ensure_space_with_branch(&job->bcl, 256 /* XXX */); - - if (vc5->prim_mode != info->mode) { - vc5->prim_mode = info->mode; - vc5->dirty |= VC5_DIRTY_PRIM_MODE; - } - - vc5_start_draw(vc5); - vc5_update_compiled_shaders(vc5, info->mode); - vc5_update_job_ez(vc5, job); - -#if V3D_VERSION >= 41 - v3d41_emit_state(pctx); -#else - v3d33_emit_state(pctx); -#endif - - if (vc5->dirty & (VC5_DIRTY_VTXBUF | - VC5_DIRTY_VTXSTATE | - VC5_DIRTY_PRIM_MODE | - VC5_DIRTY_RASTERIZER | - VC5_DIRTY_COMPILED_CS | - VC5_DIRTY_COMPILED_VS | - VC5_DIRTY_COMPILED_FS | - vc5->prog.cs->uniform_dirty_bits | - vc5->prog.vs->uniform_dirty_bits | - vc5->prog.fs->uniform_dirty_bits)) { - vc5_emit_gl_shader_state(vc5, info); - } - - vc5->dirty = 0; - - /* The Base Vertex/Base Instance packet sets those values to nonzero - * for the next draw call only. - */ - if (info->index_bias || info->start_instance) { - cl_emit(&job->bcl, BASE_VERTEX_BASE_INSTANCE, base) { - base.base_instance = info->start_instance; - base.base_vertex = info->index_bias; - } - } - - uint32_t prim_tf_enable = 0; -#if V3D_VERSION < 40 - /* V3D 3.x: The HW only processes transform feedback on primitives - * with the flag set. - */ - if (vc5->streamout.num_targets) - prim_tf_enable = (V3D_PRIM_POINTS_TF - V3D_PRIM_POINTS); -#endif - - vc5_tf_statistics_record(vc5, info, vc5->streamout.num_targets); - - /* Note that the primitive type fields match with OpenGL/gallium - * definitions, up to but not including QUADS. - */ - if (info->index_size) { - uint32_t index_size = info->index_size; - uint32_t offset = info->start * index_size; - struct pipe_resource *prsc; - if (info->has_user_indices) { - prsc = NULL; - u_upload_data(vc5->uploader, 0, - info->count * info->index_size, 4, - info->index.user, - &offset, &prsc); - } else { - prsc = info->index.resource; - } - struct vc5_resource *rsc = vc5_resource(prsc); - -#if V3D_VERSION >= 40 - cl_emit(&job->bcl, INDEX_BUFFER_SETUP, ib) { - ib.address = cl_address(rsc->bo, 0); - ib.size = rsc->bo->size; - } -#endif - - if (info->instance_count > 1) { - cl_emit(&job->bcl, INDEXED_INSTANCED_PRIMITIVE_LIST, prim) { - prim.index_type = ffs(info->index_size) - 1; -#if V3D_VERSION >= 40 - prim.index_offset = offset; -#else /* V3D_VERSION < 40 */ - prim.maximum_index = (1u << 31) - 1; /* XXX */ - prim.address_of_indices_list = - cl_address(rsc->bo, offset); -#endif /* V3D_VERSION < 40 */ - prim.mode = info->mode | prim_tf_enable; - prim.enable_primitive_restarts = info->primitive_restart; - - prim.number_of_instances = info->instance_count; - prim.instance_length = info->count; - } - } else { - cl_emit(&job->bcl, INDEXED_PRIMITIVE_LIST, prim) { - prim.index_type = ffs(info->index_size) - 1; - prim.length = info->count; -#if V3D_VERSION >= 40 - prim.index_offset = offset; -#else /* V3D_VERSION < 40 */ - prim.maximum_index = (1u << 31) - 1; /* XXX */ - prim.address_of_indices_list = - cl_address(rsc->bo, offset); -#endif /* V3D_VERSION < 40 */ - prim.mode = info->mode | prim_tf_enable; - prim.enable_primitive_restarts = info->primitive_restart; - } - } - - job->draw_calls_queued++; - - if (info->has_user_indices) - pipe_resource_reference(&prsc, NULL); - } else { - if (info->instance_count > 1) { - cl_emit(&job->bcl, VERTEX_ARRAY_INSTANCED_PRIMITIVES, prim) { - prim.mode = info->mode | prim_tf_enable; - prim.index_of_first_vertex = info->start; - prim.number_of_instances = info->instance_count; - prim.instance_length = info->count; - } - } else { - cl_emit(&job->bcl, VERTEX_ARRAY_PRIMITIVES, prim) { - prim.mode = info->mode | prim_tf_enable; - prim.length = info->count; - prim.index_of_first_vertex = info->start; - } - } - } - job->draw_calls_queued++; - - if (vc5->zsa && job->zsbuf && - (vc5->zsa->base.depth.enabled || - vc5->zsa->base.stencil[0].enabled)) { - struct vc5_resource *rsc = vc5_resource(job->zsbuf->texture); - vc5_job_add_bo(job, rsc->bo); - - if (vc5->zsa->base.depth.enabled) { - job->resolve |= PIPE_CLEAR_DEPTH; - rsc->initialized_buffers = PIPE_CLEAR_DEPTH; - } - - if (vc5->zsa->base.stencil[0].enabled) { - job->resolve |= PIPE_CLEAR_STENCIL; - rsc->initialized_buffers |= PIPE_CLEAR_STENCIL; - } - } - - for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++) { - uint32_t bit = PIPE_CLEAR_COLOR0 << i; - - if (job->resolve & bit || !job->cbufs[i]) - continue; - struct vc5_resource *rsc = vc5_resource(job->cbufs[i]->texture); - - job->resolve |= bit; - vc5_job_add_bo(job, rsc->bo); - } - - if (job->referenced_size > 768 * 1024 * 1024) { - perf_debug("Flushing job with %dkb to try to free up memory\n", - job->referenced_size / 1024); - vc5_flush(pctx); - } - - if (V3D_DEBUG & V3D_DEBUG_ALWAYS_FLUSH) - vc5_flush(pctx); -} - -static void -vc5_clear(struct pipe_context *pctx, unsigned buffers, - const union pipe_color_union *color, double depth, unsigned stencil) -{ - struct vc5_context *vc5 = vc5_context(pctx); - struct vc5_job *job = vc5_get_job_for_fbo(vc5); - - /* We can't flag new buffers for clearing once we've queued draws. We - * could avoid this by using the 3d engine to clear. - */ - if (job->draw_calls_queued) { - perf_debug("Flushing rendering to process new clear.\n"); - vc5_job_submit(vc5, job); - job = vc5_get_job_for_fbo(vc5); - } - - for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++) { - uint32_t bit = PIPE_CLEAR_COLOR0 << i; - if (!(buffers & bit)) - continue; - - struct pipe_surface *psurf = vc5->framebuffer.cbufs[i]; - struct vc5_surface *surf = vc5_surface(psurf); - struct vc5_resource *rsc = vc5_resource(psurf->texture); - - union util_color uc; - uint32_t internal_size = 4 << surf->internal_bpp; - - static union pipe_color_union swapped_color; - if (vc5->swap_color_rb & (1 << i)) { - swapped_color.f[0] = color->f[2]; - swapped_color.f[1] = color->f[1]; - swapped_color.f[2] = color->f[0]; - swapped_color.f[3] = color->f[3]; - color = &swapped_color; - } - - switch (surf->internal_type) { - case V3D_INTERNAL_TYPE_8: - util_pack_color(color->f, PIPE_FORMAT_R8G8B8A8_UNORM, - &uc); - memcpy(job->clear_color[i], uc.ui, internal_size); - break; - case V3D_INTERNAL_TYPE_8I: - case V3D_INTERNAL_TYPE_8UI: - job->clear_color[i][0] = ((color->ui[0] & 0xff) | - (color->ui[1] & 0xff) << 8 | - (color->ui[2] & 0xff) << 16 | - (color->ui[3] & 0xff) << 24); - break; - case V3D_INTERNAL_TYPE_16F: - util_pack_color(color->f, PIPE_FORMAT_R16G16B16A16_FLOAT, - &uc); - memcpy(job->clear_color[i], uc.ui, internal_size); - break; - case V3D_INTERNAL_TYPE_16I: - case V3D_INTERNAL_TYPE_16UI: - job->clear_color[i][0] = ((color->ui[0] & 0xffff) | - color->ui[1] << 16); - job->clear_color[i][1] = ((color->ui[2] & 0xffff) | - color->ui[3] << 16); - break; - case V3D_INTERNAL_TYPE_32F: - case V3D_INTERNAL_TYPE_32I: - case V3D_INTERNAL_TYPE_32UI: - memcpy(job->clear_color[i], color->ui, internal_size); - break; - } - - rsc->initialized_buffers |= bit; - } - - unsigned zsclear = buffers & PIPE_CLEAR_DEPTHSTENCIL; - if (zsclear) { - struct vc5_resource *rsc = - vc5_resource(vc5->framebuffer.zsbuf->texture); - - if (zsclear & PIPE_CLEAR_DEPTH) - job->clear_z = depth; - if (zsclear & PIPE_CLEAR_STENCIL) - job->clear_s = stencil; - - rsc->initialized_buffers |= zsclear; - } - - job->draw_min_x = 0; - job->draw_min_y = 0; - job->draw_max_x = vc5->framebuffer.width; - job->draw_max_y = vc5->framebuffer.height; - job->cleared |= buffers; - job->resolve |= buffers; - - vc5_start_draw(vc5); -} - -static void -vc5_clear_render_target(struct pipe_context *pctx, struct pipe_surface *ps, - const union pipe_color_union *color, - unsigned x, unsigned y, unsigned w, unsigned h, - bool render_condition_enabled) -{ - fprintf(stderr, "unimpl: clear RT\n"); -} - -static void -vc5_clear_depth_stencil(struct pipe_context *pctx, struct pipe_surface *ps, - unsigned buffers, double depth, unsigned stencil, - unsigned x, unsigned y, unsigned w, unsigned h, - bool render_condition_enabled) -{ - fprintf(stderr, "unimpl: clear DS\n"); -} - -void -v3dX(draw_init)(struct pipe_context *pctx) -{ - pctx->draw_vbo = vc5_draw_vbo; - pctx->clear = vc5_clear; - pctx->clear_render_target = vc5_clear_render_target; - pctx->clear_depth_stencil = vc5_clear_depth_stencil; -} diff --git a/src/gallium/drivers/vc5/vc5_emit.c b/src/gallium/drivers/vc5/vc5_emit.c deleted file mode 100644 index cb8af953300..00000000000 --- a/src/gallium/drivers/vc5/vc5_emit.c +++ /dev/null @@ -1,722 +0,0 @@ -/* - * Copyright © 2014-2017 Broadcom - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "util/u_format.h" -#include "util/u_half.h" -#include "vc5_context.h" -#include "broadcom/common/v3d_macros.h" -#include "broadcom/cle/v3dx_pack.h" -#include "broadcom/compiler/v3d_compiler.h" - -static uint8_t -vc5_factor(enum pipe_blendfactor factor, bool dst_alpha_one) -{ - /* We may get a bad blendfactor when blending is disabled. */ - if (factor == 0) - return V3D_BLEND_FACTOR_ZERO; - - switch (factor) { - case PIPE_BLENDFACTOR_ZERO: - return V3D_BLEND_FACTOR_ZERO; - case PIPE_BLENDFACTOR_ONE: - return V3D_BLEND_FACTOR_ONE; - case PIPE_BLENDFACTOR_SRC_COLOR: - return V3D_BLEND_FACTOR_SRC_COLOR; - case PIPE_BLENDFACTOR_INV_SRC_COLOR: - return V3D_BLEND_FACTOR_INV_SRC_COLOR; - case PIPE_BLENDFACTOR_DST_COLOR: - return V3D_BLEND_FACTOR_DST_COLOR; - case PIPE_BLENDFACTOR_INV_DST_COLOR: - return V3D_BLEND_FACTOR_INV_DST_COLOR; - case PIPE_BLENDFACTOR_SRC_ALPHA: - return V3D_BLEND_FACTOR_SRC_ALPHA; - case PIPE_BLENDFACTOR_INV_SRC_ALPHA: - return V3D_BLEND_FACTOR_INV_SRC_ALPHA; - case PIPE_BLENDFACTOR_DST_ALPHA: - return (dst_alpha_one ? - V3D_BLEND_FACTOR_ONE : - V3D_BLEND_FACTOR_DST_ALPHA); - case PIPE_BLENDFACTOR_INV_DST_ALPHA: - return (dst_alpha_one ? - V3D_BLEND_FACTOR_ZERO : - V3D_BLEND_FACTOR_INV_DST_ALPHA); - case PIPE_BLENDFACTOR_CONST_COLOR: - return V3D_BLEND_FACTOR_CONST_COLOR; - case PIPE_BLENDFACTOR_INV_CONST_COLOR: - return V3D_BLEND_FACTOR_INV_CONST_COLOR; - case PIPE_BLENDFACTOR_CONST_ALPHA: - return V3D_BLEND_FACTOR_CONST_ALPHA; - case PIPE_BLENDFACTOR_INV_CONST_ALPHA: - return V3D_BLEND_FACTOR_INV_CONST_ALPHA; - case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: - return V3D_BLEND_FACTOR_SRC_ALPHA_SATURATE; - default: - unreachable("Bad blend factor"); - } -} - -static inline uint16_t -swizzled_border_color(const struct v3d_device_info *devinfo, - struct pipe_sampler_state *sampler, - struct vc5_sampler_view *sview, - int chan) -{ - const struct util_format_description *desc = - util_format_description(sview->base.format); - uint8_t swiz = chan; - - /* If we're doing swizzling in the sampler, then only rearrange the - * border color for the mismatch between the VC5 texture format and - * the PIPE_FORMAT, since GL_ARB_texture_swizzle will be handled by - * the sampler's swizzle. - * - * For swizzling in the shader, we don't do any pre-swizzling of the - * border color. - */ - if (vc5_get_tex_return_size(devinfo, sview->base.format, - sampler->compare_mode) != 32) - swiz = desc->swizzle[swiz]; - - switch (swiz) { - case PIPE_SWIZZLE_0: - return util_float_to_half(0.0); - case PIPE_SWIZZLE_1: - return util_float_to_half(1.0); - default: - return util_float_to_half(sampler->border_color.f[swiz]); - } -} - -#if V3D_VERSION < 40 -static uint32_t -translate_swizzle(unsigned char pipe_swizzle) -{ - switch (pipe_swizzle) { - case PIPE_SWIZZLE_0: - return 0; - case PIPE_SWIZZLE_1: - return 1; - case PIPE_SWIZZLE_X: - case PIPE_SWIZZLE_Y: - case PIPE_SWIZZLE_Z: - case PIPE_SWIZZLE_W: - return 2 + pipe_swizzle; - default: - unreachable("unknown swizzle"); - } -} - -static void -emit_one_texture(struct vc5_context *vc5, struct vc5_texture_stateobj *stage_tex, - int i) -{ - struct vc5_job *job = vc5->job; - struct pipe_sampler_state *psampler = stage_tex->samplers[i]; - struct vc5_sampler_state *sampler = vc5_sampler_state(psampler); - struct pipe_sampler_view *psview = stage_tex->textures[i]; - struct vc5_sampler_view *sview = vc5_sampler_view(psview); - struct pipe_resource *prsc = psview->texture; - struct vc5_resource *rsc = vc5_resource(prsc); - const struct v3d_device_info *devinfo = &vc5->screen->devinfo; - - stage_tex->texture_state[i].offset = - vc5_cl_ensure_space(&job->indirect, - cl_packet_length(TEXTURE_SHADER_STATE), - 32); - vc5_bo_set_reference(&stage_tex->texture_state[i].bo, - job->indirect.bo); - - uint32_t return_size = vc5_get_tex_return_size(devinfo, psview->format, - psampler->compare_mode); - - struct V3D33_TEXTURE_SHADER_STATE unpacked = { - /* XXX */ - .border_color_red = swizzled_border_color(devinfo, psampler, - sview, 0), - .border_color_green = swizzled_border_color(devinfo, psampler, - sview, 1), - .border_color_blue = swizzled_border_color(devinfo, psampler, - sview, 2), - .border_color_alpha = swizzled_border_color(devinfo, psampler, - sview, 3), - - /* In the normal texturing path, the LOD gets clamped between - * min/max, and the base_level field (set in the sampler view - * from first_level) only decides where the min/mag switch - * happens, so we need to use the LOD clamps to keep us - * between min and max. - * - * For txf, the LOD clamp is still used, despite GL not - * wanting that. We will need to have a separate - * TEXTURE_SHADER_STATE that ignores psview->min/max_lod to - * support txf properly. - */ - .min_level_of_detail = MIN2(psview->u.tex.first_level + - MAX2(psampler->min_lod, 0), - psview->u.tex.last_level), - .max_level_of_detail = MIN2(psview->u.tex.first_level + - psampler->max_lod, - psview->u.tex.last_level), - - .texture_base_pointer = cl_address(rsc->bo, - rsc->slices[0].offset), - - .output_32_bit = return_size == 32, - }; - - /* Set up the sampler swizzle if we're doing 16-bit sampling. For - * 32-bit, we leave swizzling up to the shader compiler. - * - * Note: Contrary to the docs, the swizzle still applies even if the - * return size is 32. It's just that you probably want to swizzle in - * the shader, because you need the Y/Z/W channels to be defined. - */ - if (return_size == 32) { - unpacked.swizzle_r = translate_swizzle(PIPE_SWIZZLE_X); - unpacked.swizzle_g = translate_swizzle(PIPE_SWIZZLE_Y); - unpacked.swizzle_b = translate_swizzle(PIPE_SWIZZLE_Z); - unpacked.swizzle_a = translate_swizzle(PIPE_SWIZZLE_W); - } else { - unpacked.swizzle_r = translate_swizzle(sview->swizzle[0]); - unpacked.swizzle_g = translate_swizzle(sview->swizzle[1]); - unpacked.swizzle_b = translate_swizzle(sview->swizzle[2]); - unpacked.swizzle_a = translate_swizzle(sview->swizzle[3]); - } - - int min_img_filter = psampler->min_img_filter; - int min_mip_filter = psampler->min_mip_filter; - int mag_img_filter = psampler->mag_img_filter; - - if (return_size == 32) { - min_mip_filter = PIPE_TEX_MIPFILTER_NEAREST; - mag_img_filter = PIPE_TEX_FILTER_NEAREST; - mag_img_filter = PIPE_TEX_FILTER_NEAREST; - } - - bool min_nearest = min_img_filter == PIPE_TEX_FILTER_NEAREST; - switch (min_mip_filter) { - case PIPE_TEX_MIPFILTER_NONE: - unpacked.filter += min_nearest ? 2 : 0; - break; - case PIPE_TEX_MIPFILTER_NEAREST: - unpacked.filter += min_nearest ? 4 : 8; - break; - case PIPE_TEX_MIPFILTER_LINEAR: - unpacked.filter += min_nearest ? 4 : 8; - unpacked.filter += 2; - break; - } - - if (mag_img_filter == PIPE_TEX_FILTER_NEAREST) - unpacked.filter++; - - if (psampler->max_anisotropy > 8) - unpacked.filter = V3D_TMU_FILTER_ANISOTROPIC_16_1; - else if (psampler->max_anisotropy > 4) - unpacked.filter = V3D_TMU_FILTER_ANISOTROPIC_8_1; - else if (psampler->max_anisotropy > 2) - unpacked.filter = V3D_TMU_FILTER_ANISOTROPIC_4_1; - else if (psampler->max_anisotropy) - unpacked.filter = V3D_TMU_FILTER_ANISOTROPIC_2_1; - - uint8_t packed[cl_packet_length(TEXTURE_SHADER_STATE)]; - cl_packet_pack(TEXTURE_SHADER_STATE)(&job->indirect, packed, &unpacked); - - for (int i = 0; i < ARRAY_SIZE(packed); i++) - packed[i] |= sview->texture_shader_state[i] | sampler->texture_shader_state[i]; - - /* TMU indirect structs need to be 32b aligned. */ - vc5_cl_ensure_space(&job->indirect, ARRAY_SIZE(packed), 32); - cl_emit_prepacked(&job->indirect, &packed); -} - -static void -emit_textures(struct vc5_context *vc5, struct vc5_texture_stateobj *stage_tex) -{ - for (int i = 0; i < stage_tex->num_textures; i++) { - if (stage_tex->textures[i]) - emit_one_texture(vc5, stage_tex, i); - } -} -#endif /* V3D_VERSION < 40 */ - -static uint32_t -translate_colormask(struct vc5_context *vc5, uint32_t colormask, int rt) -{ - if (vc5->swap_color_rb & (1 << rt)) { - colormask = ((colormask & (2 | 8)) | - ((colormask & 1) << 2) | - ((colormask & 4) >> 2)); - } - - return (~colormask) & 0xf; -} - -static void -emit_rt_blend(struct vc5_context *vc5, struct vc5_job *job, - struct pipe_blend_state *blend, int rt) -{ - cl_emit(&job->bcl, BLEND_CONFIG, config) { - struct pipe_rt_blend_state *rtblend = &blend->rt[rt]; - -#if V3D_VERSION >= 40 - config.render_target_mask = 1 << rt; -#else - assert(rt == 0); -#endif - - config.colour_blend_mode = rtblend->rgb_func; - config.colour_blend_dst_factor = - vc5_factor(rtblend->rgb_dst_factor, - vc5->blend_dst_alpha_one); - config.colour_blend_src_factor = - vc5_factor(rtblend->rgb_src_factor, - vc5->blend_dst_alpha_one); - - config.alpha_blend_mode = rtblend->alpha_func; - config.alpha_blend_dst_factor = - vc5_factor(rtblend->alpha_dst_factor, - vc5->blend_dst_alpha_one); - config.alpha_blend_src_factor = - vc5_factor(rtblend->alpha_src_factor, - vc5->blend_dst_alpha_one); - } -} - -void -v3dX(emit_state)(struct pipe_context *pctx) -{ - struct vc5_context *vc5 = vc5_context(pctx); - struct vc5_job *job = vc5->job; - bool rasterizer_discard = vc5->rasterizer->base.rasterizer_discard; - - if (vc5->dirty & (VC5_DIRTY_SCISSOR | VC5_DIRTY_VIEWPORT | - VC5_DIRTY_RASTERIZER)) { - float *vpscale = vc5->viewport.scale; - float *vptranslate = vc5->viewport.translate; - float vp_minx = -fabsf(vpscale[0]) + vptranslate[0]; - float vp_maxx = fabsf(vpscale[0]) + vptranslate[0]; - float vp_miny = -fabsf(vpscale[1]) + vptranslate[1]; - float vp_maxy = fabsf(vpscale[1]) + vptranslate[1]; - - /* Clip to the scissor if it's enabled, but still clip to the - * drawable regardless since that controls where the binner - * tries to put things. - * - * Additionally, always clip the rendering to the viewport, - * since the hardware does guardband clipping, meaning - * primitives would rasterize outside of the view volume. - */ - uint32_t minx, miny, maxx, maxy; - if (!vc5->rasterizer->base.scissor) { - minx = MAX2(vp_minx, 0); - miny = MAX2(vp_miny, 0); - maxx = MIN2(vp_maxx, job->draw_width); - maxy = MIN2(vp_maxy, job->draw_height); - } else { - minx = MAX2(vp_minx, vc5->scissor.minx); - miny = MAX2(vp_miny, vc5->scissor.miny); - maxx = MIN2(vp_maxx, vc5->scissor.maxx); - maxy = MIN2(vp_maxy, vc5->scissor.maxy); - } - - cl_emit(&job->bcl, CLIP_WINDOW, clip) { - clip.clip_window_left_pixel_coordinate = minx; - clip.clip_window_bottom_pixel_coordinate = miny; - clip.clip_window_width_in_pixels = maxx - minx; - clip.clip_window_height_in_pixels = maxy - miny; - -#if V3D_VERSION < 41 - /* The HW won't entirely clip out when scissor w/h is - * 0. Just treat it the same as rasterizer discard. - */ - if (clip.clip_window_width_in_pixels == 0 || - clip.clip_window_height_in_pixels == 0) { - rasterizer_discard = true; - clip.clip_window_width_in_pixels = 1; - clip.clip_window_height_in_pixels = 1; - } -#endif - } - - job->draw_min_x = MIN2(job->draw_min_x, minx); - job->draw_min_y = MIN2(job->draw_min_y, miny); - job->draw_max_x = MAX2(job->draw_max_x, maxx); - job->draw_max_y = MAX2(job->draw_max_y, maxy); - } - - if (vc5->dirty & (VC5_DIRTY_RASTERIZER | - VC5_DIRTY_ZSA | - VC5_DIRTY_BLEND | - VC5_DIRTY_COMPILED_FS)) { - cl_emit(&job->bcl, CONFIGURATION_BITS, config) { - config.enable_forward_facing_primitive = - !rasterizer_discard && - !(vc5->rasterizer->base.cull_face & - PIPE_FACE_FRONT); - config.enable_reverse_facing_primitive = - !rasterizer_discard && - !(vc5->rasterizer->base.cull_face & - PIPE_FACE_BACK); - /* This seems backwards, but it's what gets the - * clipflat test to pass. - */ - config.clockwise_primitives = - vc5->rasterizer->base.front_ccw; - - config.enable_depth_offset = - vc5->rasterizer->base.offset_tri; - - config.rasterizer_oversample_mode = - vc5->rasterizer->base.multisample; - - config.direct3d_provoking_vertex = - vc5->rasterizer->base.flatshade_first; - - config.blend_enable = vc5->blend->rt[0].blend_enable; - - /* Note: EZ state may update based on the compiled FS, - * along with ZSA - */ - config.early_z_updates_enable = - (job->ez_state != VC5_EZ_DISABLED); - if (vc5->zsa->base.depth.enabled) { - config.z_updates_enable = - vc5->zsa->base.depth.writemask; - config.early_z_enable = - config.early_z_updates_enable; - config.depth_test_function = - vc5->zsa->base.depth.func; - } else { - config.depth_test_function = PIPE_FUNC_ALWAYS; - } - - config.stencil_enable = - vc5->zsa->base.stencil[0].enabled; - } - - } - - if (vc5->dirty & VC5_DIRTY_RASTERIZER && - vc5->rasterizer->base.offset_tri) { - cl_emit(&job->bcl, DEPTH_OFFSET, depth) { - depth.depth_offset_factor = - vc5->rasterizer->offset_factor; - depth.depth_offset_units = - vc5->rasterizer->offset_units; - } - } - - if (vc5->dirty & VC5_DIRTY_RASTERIZER) { - cl_emit(&job->bcl, POINT_SIZE, point_size) { - point_size.point_size = vc5->rasterizer->point_size; - } - - cl_emit(&job->bcl, LINE_WIDTH, line_width) { - line_width.line_width = vc5->rasterizer->base.line_width; - } - } - - if (vc5->dirty & VC5_DIRTY_VIEWPORT) { - cl_emit(&job->bcl, CLIPPER_XY_SCALING, clip) { - clip.viewport_half_width_in_1_256th_of_pixel = - vc5->viewport.scale[0] * 256.0f; - clip.viewport_half_height_in_1_256th_of_pixel = - vc5->viewport.scale[1] * 256.0f; - } - - cl_emit(&job->bcl, CLIPPER_Z_SCALE_AND_OFFSET, clip) { - clip.viewport_z_offset_zc_to_zs = - vc5->viewport.translate[2]; - clip.viewport_z_scale_zc_to_zs = - vc5->viewport.scale[2]; - } - cl_emit(&job->bcl, CLIPPER_Z_MIN_MAX_CLIPPING_PLANES, clip) { - clip.minimum_zw = (vc5->viewport.translate[2] - - vc5->viewport.scale[2]); - clip.maximum_zw = (vc5->viewport.translate[2] + - vc5->viewport.scale[2]); - } - - cl_emit(&job->bcl, VIEWPORT_OFFSET, vp) { - vp.viewport_centre_x_coordinate = - vc5->viewport.translate[0]; - vp.viewport_centre_y_coordinate = - vc5->viewport.translate[1]; - } - } - - if (vc5->dirty & VC5_DIRTY_BLEND && vc5->blend->rt[0].blend_enable) { - struct pipe_blend_state *blend = vc5->blend; - - if (blend->independent_blend_enable) { - for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++) - emit_rt_blend(vc5, job, blend, i); - } else { - emit_rt_blend(vc5, job, blend, 0); - } - } - - if (vc5->dirty & VC5_DIRTY_BLEND) { - struct pipe_blend_state *blend = vc5->blend; - - cl_emit(&job->bcl, COLOUR_WRITE_MASKS, mask) { - if (blend->independent_blend_enable) { - mask.render_target_0_per_colour_component_write_masks = - translate_colormask(vc5, blend->rt[0].colormask, 0); - mask.render_target_1_per_colour_component_write_masks = - translate_colormask(vc5, blend->rt[1].colormask, 1); - mask.render_target_2_per_colour_component_write_masks = - translate_colormask(vc5, blend->rt[2].colormask, 2); - mask.render_target_3_per_colour_component_write_masks = - translate_colormask(vc5, blend->rt[3].colormask, 3); - } else { - mask.render_target_0_per_colour_component_write_masks = - translate_colormask(vc5, blend->rt[0].colormask, 0); - mask.render_target_1_per_colour_component_write_masks = - translate_colormask(vc5, blend->rt[0].colormask, 1); - mask.render_target_2_per_colour_component_write_masks = - translate_colormask(vc5, blend->rt[0].colormask, 2); - mask.render_target_3_per_colour_component_write_masks = - translate_colormask(vc5, blend->rt[0].colormask, 3); - } - } - } - - /* GFXH-1431: On V3D 3.x, writing BLEND_CONFIG resets the constant - * color. - */ - if (vc5->dirty & VC5_DIRTY_BLEND_COLOR || - (V3D_VERSION < 41 && (vc5->dirty & VC5_DIRTY_BLEND))) { - cl_emit(&job->bcl, BLEND_CONSTANT_COLOUR, colour) { - colour.red_f16 = (vc5->swap_color_rb ? - vc5->blend_color.hf[2] : - vc5->blend_color.hf[0]); - colour.green_f16 = vc5->blend_color.hf[1]; - colour.blue_f16 = (vc5->swap_color_rb ? - vc5->blend_color.hf[0] : - vc5->blend_color.hf[2]); - colour.alpha_f16 = vc5->blend_color.hf[3]; - } - } - - if (vc5->dirty & (VC5_DIRTY_ZSA | VC5_DIRTY_STENCIL_REF)) { - struct pipe_stencil_state *front = &vc5->zsa->base.stencil[0]; - struct pipe_stencil_state *back = &vc5->zsa->base.stencil[1]; - - if (front->enabled) { - cl_emit_with_prepacked(&job->bcl, STENCIL_CONFIG, - vc5->zsa->stencil_front, config) { - config.stencil_ref_value = - vc5->stencil_ref.ref_value[0]; - } - } - - if (back->enabled) { - cl_emit_with_prepacked(&job->bcl, STENCIL_CONFIG, - vc5->zsa->stencil_back, config) { - config.stencil_ref_value = - vc5->stencil_ref.ref_value[1]; - } - } - } - -#if V3D_VERSION < 40 - /* Pre-4.x, we have texture state that depends on both the sampler and - * the view, so we merge them together at draw time. - */ - if (vc5->dirty & VC5_DIRTY_FRAGTEX) - emit_textures(vc5, &vc5->fragtex); - - if (vc5->dirty & VC5_DIRTY_VERTTEX) - emit_textures(vc5, &vc5->verttex); -#endif - - if (vc5->dirty & VC5_DIRTY_FLAT_SHADE_FLAGS) { - bool emitted_any = false; - - for (int i = 0; i < ARRAY_SIZE(vc5->prog.fs->prog_data.fs->flat_shade_flags); i++) { - if (!vc5->prog.fs->prog_data.fs->flat_shade_flags[i]) - continue; - - cl_emit(&job->bcl, FLAT_SHADE_FLAGS, flags) { - flags.varying_offset_v0 = i; - - if (emitted_any) { - flags.action_for_flat_shade_flags_of_lower_numbered_varyings = - V3D_VARYING_FLAGS_ACTION_UNCHANGED; - flags.action_for_flat_shade_flags_of_higher_numbered_varyings = - V3D_VARYING_FLAGS_ACTION_UNCHANGED; - } else { - flags.action_for_flat_shade_flags_of_lower_numbered_varyings = - ((i == 0) ? - V3D_VARYING_FLAGS_ACTION_UNCHANGED : - V3D_VARYING_FLAGS_ACTION_ZEROED); - - flags.action_for_flat_shade_flags_of_higher_numbered_varyings = - V3D_VARYING_FLAGS_ACTION_ZEROED; - } - - flags.flat_shade_flags_for_varyings_v024 = - vc5->prog.fs->prog_data.fs->flat_shade_flags[i]; - } - - emitted_any = true; - } - - if (!emitted_any) { - cl_emit(&job->bcl, ZERO_ALL_FLAT_SHADE_FLAGS, flags); - } - } - -#if V3D_VERSION >= 40 - if (vc5->dirty & VC5_DIRTY_CENTROID_FLAGS) { - bool emitted_any = false; - - for (int i = 0; i < ARRAY_SIZE(vc5->prog.fs->prog_data.fs->centroid_flags); i++) { - if (!vc5->prog.fs->prog_data.fs->centroid_flags[i]) - continue; - - cl_emit(&job->bcl, CENTROID_FLAGS, flags) { - flags.varying_offset_v0 = i; - - if (emitted_any) { - flags.action_for_centroid_flags_of_lower_numbered_varyings = - V3D_VARYING_FLAGS_ACTION_UNCHANGED; - flags.action_for_centroid_flags_of_higher_numbered_varyings = - V3D_VARYING_FLAGS_ACTION_UNCHANGED; - } else { - flags.action_for_centroid_flags_of_lower_numbered_varyings = - ((i == 0) ? - V3D_VARYING_FLAGS_ACTION_UNCHANGED : - V3D_VARYING_FLAGS_ACTION_ZEROED); - - flags.action_for_centroid_flags_of_higher_numbered_varyings = - V3D_VARYING_FLAGS_ACTION_ZEROED; - } - - flags.centroid_flags_for_varyings_v024 = - vc5->prog.fs->prog_data.fs->centroid_flags[i]; - } - - emitted_any = true; - } - - if (!emitted_any) { - cl_emit(&job->bcl, ZERO_ALL_CENTROID_FLAGS, flags); - } - } -#endif - - /* Set up the transform feedback data specs (which VPM entries to - * output to which buffers). - */ - if (vc5->dirty & (VC5_DIRTY_STREAMOUT | - VC5_DIRTY_RASTERIZER | - VC5_DIRTY_PRIM_MODE)) { - struct vc5_streamout_stateobj *so = &vc5->streamout; - - if (so->num_targets) { - bool psiz_per_vertex = (vc5->prim_mode == PIPE_PRIM_POINTS && - vc5->rasterizer->base.point_size_per_vertex); - uint16_t *tf_specs = (psiz_per_vertex ? - vc5->prog.bind_vs->tf_specs_psiz : - vc5->prog.bind_vs->tf_specs); - -#if V3D_VERSION >= 40 - job->tf_enabled = (vc5->prog.bind_vs->num_tf_specs != 0 && - vc5->active_queries); - - cl_emit(&job->bcl, TRANSFORM_FEEDBACK_SPECS, tfe) { - tfe.number_of_16_bit_output_data_specs_following = - vc5->prog.bind_vs->num_tf_specs; - tfe.enable = job->tf_enabled; - }; -#else /* V3D_VERSION < 40 */ - cl_emit(&job->bcl, TRANSFORM_FEEDBACK_ENABLE, tfe) { - tfe.number_of_32_bit_output_buffer_address_following = - so->num_targets; - tfe.number_of_16_bit_output_data_specs_following = - vc5->prog.bind_vs->num_tf_specs; - }; -#endif /* V3D_VERSION < 40 */ - for (int i = 0; i < vc5->prog.bind_vs->num_tf_specs; i++) { - cl_emit_prepacked(&job->bcl, &tf_specs[i]); - } - } else if (job->tf_enabled) { -#if V3D_VERSION >= 40 - cl_emit(&job->bcl, TRANSFORM_FEEDBACK_SPECS, tfe) { - tfe.enable = false; - }; - job->tf_enabled = false; -#endif /* V3D_VERSION >= 40 */ - } - } - - /* Set up the trasnform feedback buffers. */ - if (vc5->dirty & VC5_DIRTY_STREAMOUT) { - struct vc5_streamout_stateobj *so = &vc5->streamout; - for (int i = 0; i < so->num_targets; i++) { - const struct pipe_stream_output_target *target = - so->targets[i]; - struct vc5_resource *rsc = target ? - vc5_resource(target->buffer) : NULL; - -#if V3D_VERSION >= 40 - if (!target) - continue; - - cl_emit(&job->bcl, TRANSFORM_FEEDBACK_BUFFER, output) { - output.buffer_address = - cl_address(rsc->bo, - target->buffer_offset); - output.buffer_size_in_32_bit_words = - target->buffer_size >> 2; - output.buffer_number = i; - } -#else /* V3D_VERSION < 40 */ - cl_emit(&job->bcl, TRANSFORM_FEEDBACK_OUTPUT_ADDRESS, output) { - if (target) { - output.address = - cl_address(rsc->bo, - target->buffer_offset); - } - }; -#endif /* V3D_VERSION < 40 */ - if (target) { - vc5_job_add_write_resource(vc5->job, - target->buffer); - } - /* XXX: buffer_size? */ - } - } - - if (vc5->dirty & VC5_DIRTY_OQ) { - cl_emit(&job->bcl, OCCLUSION_QUERY_COUNTER, counter) { - job->oq_enabled = vc5->active_queries && vc5->current_oq; - if (job->oq_enabled) { - counter.address = cl_address(vc5->current_oq, 0); - } - } - } -} diff --git a/src/gallium/drivers/vc5/vc5_fence.c b/src/gallium/drivers/vc5/vc5_fence.c deleted file mode 100644 index 731dd6db908..00000000000 --- a/src/gallium/drivers/vc5/vc5_fence.c +++ /dev/null @@ -1,104 +0,0 @@ -/* - * Copyright © 2014 Broadcom - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -/** @file vc5_fence.c - * - * Seqno-based fence management. - * - * We have two mechanisms for waiting in our kernel API: You can wait on a BO - * to have all rendering to from any process to be completed, or wait on a - * seqno for that particular seqno to be passed. The fence API we're - * implementing is based on waiting for all rendering in the context to have - * completed (with no reference to what other processes might be doing with - * the same BOs), so we can just use the seqno of the last rendering we'd - * fired off as our fence marker. - */ - -#include "util/u_inlines.h" - -#include "vc5_context.h" -#include "vc5_bufmgr.h" - -struct vc5_fence { - struct pipe_reference reference; - uint32_t sync; -}; - -static void -vc5_fence_reference(struct pipe_screen *pscreen, - struct pipe_fence_handle **pp, - struct pipe_fence_handle *pf) -{ - struct vc5_screen *screen = vc5_screen(pscreen); - struct vc5_fence **p = (struct vc5_fence **)pp; - struct vc5_fence *f = (struct vc5_fence *)pf; - struct vc5_fence *old = *p; - - if (pipe_reference(&(*p)->reference, &f->reference)) { - drmSyncobjDestroy(screen->fd, old->sync); - free(old); - } - *p = f; -} - -static boolean -vc5_fence_finish(struct pipe_screen *pscreen, - struct pipe_context *ctx, - struct pipe_fence_handle *pf, - uint64_t timeout_ns) -{ - struct vc5_screen *screen = vc5_screen(pscreen); - struct vc5_fence *f = (struct vc5_fence *)pf; - - return drmSyncobjWait(screen->fd, &f->sync, 1, timeout_ns, 0, NULL); -} - -struct vc5_fence * -vc5_fence_create(struct vc5_context *vc5) -{ - struct vc5_fence *f = calloc(1, sizeof(*f)); - if (!f) - return NULL; - - uint32_t new_sync; - /* Make a new sync object for the context. */ - int ret = drmSyncobjCreate(vc5->fd, DRM_SYNCOBJ_CREATE_SIGNALED, - &new_sync); - if (ret) { - free(f); - return NULL; - } - - pipe_reference_init(&f->reference, 1); - f->sync = vc5->out_sync; - vc5->out_sync = new_sync; - - return f; -} - -void -vc5_fence_init(struct vc5_screen *screen) -{ - screen->base.fence_reference = vc5_fence_reference; - screen->base.fence_finish = vc5_fence_finish; -} diff --git a/src/gallium/drivers/vc5/vc5_format_table.h b/src/gallium/drivers/vc5/vc5_format_table.h deleted file mode 100644 index 8b8011351a1..00000000000 --- a/src/gallium/drivers/vc5/vc5_format_table.h +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Copyright © 2014-2018 Broadcom - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#define V3D_OUTPUT_IMAGE_FORMAT_NO 255 - -#include -#include - -struct vc5_format { - /** Set if the pipe format is defined in the table. */ - bool present; - - /** One of V3D33_OUTPUT_IMAGE_FORMAT_*, or OUTPUT_IMAGE_FORMAT_NO */ - uint8_t rt_type; - - /** One of V3D33_TEXTURE_DATA_FORMAT_*. */ - uint8_t tex_type; - - /** - * Swizzle to apply to the RGBA shader output for storing to the tile - * buffer, to the RGBA tile buffer to produce shader input (for - * blending), and for turning the rgba8888 texture sampler return - * value into shader rgba values. - */ - uint8_t swizzle[4]; - - /* Whether the return value is 16F/I/UI or 32F/I/UI. */ - uint8_t return_size; - - /* If return_size == 32, how many channels are returned by texturing. - * 16 always returns 2 pairs of 16 bit values. - */ - uint8_t return_channels; -}; diff --git a/src/gallium/drivers/vc5/vc5_formats.c b/src/gallium/drivers/vc5/vc5_formats.c deleted file mode 100644 index b65b7cdbe71..00000000000 --- a/src/gallium/drivers/vc5/vc5_formats.c +++ /dev/null @@ -1,144 +0,0 @@ -/* - * Copyright © 2014-2017 Broadcom - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -/** - * @file vc5_formats.c - * - * Contains the table and accessors for VC5 texture and render target format - * support. - * - * The hardware has limited support for texture formats, and extremely limited - * support for render target formats. As a result, we emulate other formats - * in our shader code, and this stores the table for doing so. - */ - -#include "util/macros.h" - -#include "vc5_context.h" -#include "vc5_format_table.h" - -static const struct vc5_format * -get_format(const struct v3d_device_info *devinfo, enum pipe_format f) -{ - if (devinfo->ver >= 41) - return v3d41_get_format_desc(f); - else - return v3d33_get_format_desc(f); -} - -bool -vc5_rt_format_supported(const struct v3d_device_info *devinfo, - enum pipe_format f) -{ - const struct vc5_format *vf = get_format(devinfo, f); - - if (!vf) - return false; - - return vf->rt_type != V3D_OUTPUT_IMAGE_FORMAT_NO; -} - -uint8_t -vc5_get_rt_format(const struct v3d_device_info *devinfo, enum pipe_format f) -{ - const struct vc5_format *vf = get_format(devinfo, f); - - if (!vf) - return 0; - - return vf->rt_type; -} - -bool -vc5_tex_format_supported(const struct v3d_device_info *devinfo, - enum pipe_format f) -{ - const struct vc5_format *vf = get_format(devinfo, f); - - return vf != NULL; -} - -uint8_t -vc5_get_tex_format(const struct v3d_device_info *devinfo, enum pipe_format f) -{ - const struct vc5_format *vf = get_format(devinfo, f); - - if (!vf) - return 0; - - return vf->tex_type; -} - -uint8_t -vc5_get_tex_return_size(const struct v3d_device_info *devinfo, - enum pipe_format f, enum pipe_tex_compare compare) -{ - const struct vc5_format *vf = get_format(devinfo, f); - - if (!vf) - return 0; - - if (compare == PIPE_TEX_COMPARE_R_TO_TEXTURE) - return 16; - - return vf->return_size; -} - -uint8_t -vc5_get_tex_return_channels(const struct v3d_device_info *devinfo, - enum pipe_format f) -{ - const struct vc5_format *vf = get_format(devinfo, f); - - if (!vf) - return 0; - - return vf->return_channels; -} - -const uint8_t * -vc5_get_format_swizzle(const struct v3d_device_info *devinfo, enum pipe_format f) -{ - const struct vc5_format *vf = get_format(devinfo, f); - static const uint8_t fallback[] = {0, 1, 2, 3}; - - if (!vf) - return fallback; - - return vf->swizzle; -} - -void -vc5_get_internal_type_bpp_for_output_format(const struct v3d_device_info *devinfo, - uint32_t format, - uint32_t *type, - uint32_t *bpp) -{ - if (devinfo->ver >= 41) { - return v3d41_get_internal_type_bpp_for_output_format(format, - type, bpp); - } else { - return v3d33_get_internal_type_bpp_for_output_format(format, - type, bpp); - } -} diff --git a/src/gallium/drivers/vc5/vc5_job.c b/src/gallium/drivers/vc5/vc5_job.c deleted file mode 100644 index 7973b9e6829..00000000000 --- a/src/gallium/drivers/vc5/vc5_job.c +++ /dev/null @@ -1,452 +0,0 @@ -/* - * Copyright © 2014-2017 Broadcom - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -/** @file vc5_job.c - * - * Functions for submitting VC5 render jobs to the kernel. - */ - -#include -#include "vc5_context.h" -/* The OQ/semaphore packets are the same across V3D versions. */ -#define V3D_VERSION 33 -#include "broadcom/cle/v3dx_pack.h" -#include "broadcom/common/v3d_macros.h" -#include "util/hash_table.h" -#include "util/ralloc.h" -#include "util/set.h" -#include "broadcom/clif/clif_dump.h" - -static void -remove_from_ht(struct hash_table *ht, void *key) -{ - struct hash_entry *entry = _mesa_hash_table_search(ht, key); - _mesa_hash_table_remove(ht, entry); -} - -static void -vc5_job_free(struct vc5_context *vc5, struct vc5_job *job) -{ - struct set_entry *entry; - - set_foreach(job->bos, entry) { - struct vc5_bo *bo = (struct vc5_bo *)entry->key; - vc5_bo_unreference(&bo); - } - - remove_from_ht(vc5->jobs, &job->key); - - if (job->write_prscs) { - struct set_entry *entry; - - set_foreach(job->write_prscs, entry) { - const struct pipe_resource *prsc = entry->key; - - remove_from_ht(vc5->write_jobs, (void *)prsc); - } - } - - for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++) { - if (job->cbufs[i]) { - remove_from_ht(vc5->write_jobs, job->cbufs[i]->texture); - pipe_surface_reference(&job->cbufs[i], NULL); - } - } - if (job->zsbuf) { - remove_from_ht(vc5->write_jobs, job->zsbuf->texture); - pipe_surface_reference(&job->zsbuf, NULL); - } - - if (vc5->job == job) - vc5->job = NULL; - - vc5_destroy_cl(&job->bcl); - vc5_destroy_cl(&job->rcl); - vc5_destroy_cl(&job->indirect); - vc5_bo_unreference(&job->tile_alloc); - vc5_bo_unreference(&job->tile_state); - - ralloc_free(job); -} - -static struct vc5_job * -vc5_job_create(struct vc5_context *vc5) -{ - struct vc5_job *job = rzalloc(vc5, struct vc5_job); - - job->vc5 = vc5; - - vc5_init_cl(job, &job->bcl); - vc5_init_cl(job, &job->rcl); - vc5_init_cl(job, &job->indirect); - - job->draw_min_x = ~0; - job->draw_min_y = ~0; - job->draw_max_x = 0; - job->draw_max_y = 0; - - job->bos = _mesa_set_create(job, - _mesa_hash_pointer, - _mesa_key_pointer_equal); - return job; -} - -void -vc5_job_add_bo(struct vc5_job *job, struct vc5_bo *bo) -{ - if (!bo) - return; - - if (_mesa_set_search(job->bos, bo)) - return; - - vc5_bo_reference(bo); - _mesa_set_add(job->bos, bo); - job->referenced_size += bo->size; - - uint32_t *bo_handles = (void *)(uintptr_t)job->submit.bo_handles; - - if (job->submit.bo_handle_count >= job->bo_handles_size) { - job->bo_handles_size = MAX2(4, job->bo_handles_size * 2); - bo_handles = reralloc(job, bo_handles, - uint32_t, job->bo_handles_size); - job->submit.bo_handles = (uintptr_t)(void *)bo_handles; - } - bo_handles[job->submit.bo_handle_count++] = bo->handle; -} - -void -vc5_job_add_write_resource(struct vc5_job *job, struct pipe_resource *prsc) -{ - struct vc5_context *vc5 = job->vc5; - - if (!job->write_prscs) { - job->write_prscs = _mesa_set_create(job, - _mesa_hash_pointer, - _mesa_key_pointer_equal); - } - - _mesa_set_add(job->write_prscs, prsc); - _mesa_hash_table_insert(vc5->write_jobs, prsc, job); -} - -void -vc5_flush_jobs_writing_resource(struct vc5_context *vc5, - struct pipe_resource *prsc) -{ - struct hash_entry *entry = _mesa_hash_table_search(vc5->write_jobs, - prsc); - if (entry) { - struct vc5_job *job = entry->data; - vc5_job_submit(vc5, job); - } -} - -void -vc5_flush_jobs_reading_resource(struct vc5_context *vc5, - struct pipe_resource *prsc) -{ - struct vc5_resource *rsc = vc5_resource(prsc); - - vc5_flush_jobs_writing_resource(vc5, prsc); - - struct hash_entry *entry; - hash_table_foreach(vc5->jobs, entry) { - struct vc5_job *job = entry->data; - - if (_mesa_set_search(job->bos, rsc->bo)) { - vc5_job_submit(vc5, job); - /* Reminder: vc5->jobs is safe to keep iterating even - * after deletion of an entry. - */ - continue; - } - } -} - -static void -vc5_job_set_tile_buffer_size(struct vc5_job *job) -{ - static const uint8_t tile_sizes[] = { - 64, 64, - 64, 32, - 32, 32, - 32, 16, - 16, 16, - }; - int tile_size_index = 0; - if (job->msaa) - tile_size_index += 2; - - if (job->cbufs[3] || job->cbufs[2]) - tile_size_index += 2; - else if (job->cbufs[1]) - tile_size_index++; - - int max_bpp = RENDER_TARGET_MAXIMUM_32BPP; - for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++) { - if (job->cbufs[i]) { - struct vc5_surface *surf = vc5_surface(job->cbufs[i]); - max_bpp = MAX2(max_bpp, surf->internal_bpp); - } - } - job->internal_bpp = max_bpp; - STATIC_ASSERT(RENDER_TARGET_MAXIMUM_32BPP == 0); - tile_size_index += max_bpp; - - assert(tile_size_index < ARRAY_SIZE(tile_sizes)); - job->tile_width = tile_sizes[tile_size_index * 2 + 0]; - job->tile_height = tile_sizes[tile_size_index * 2 + 1]; -} - -/** - * Returns a vc5_job struture for tracking V3D rendering to a particular FBO. - * - * If we've already started rendering to this FBO, then return old same job, - * otherwise make a new one. If we're beginning rendering to an FBO, make - * sure that any previous reads of the FBO (or writes to its color/Z surfaces) - * have been flushed. - */ -struct vc5_job * -vc5_get_job(struct vc5_context *vc5, - struct pipe_surface **cbufs, struct pipe_surface *zsbuf) -{ - /* Return the existing job for this FBO if we have one */ - struct vc5_job_key local_key = { - .cbufs = { - cbufs[0], - cbufs[1], - cbufs[2], - cbufs[3], - }, - .zsbuf = zsbuf, - }; - struct hash_entry *entry = _mesa_hash_table_search(vc5->jobs, - &local_key); - if (entry) - return entry->data; - - /* Creating a new job. Make sure that any previous jobs reading or - * writing these buffers are flushed. - */ - struct vc5_job *job = vc5_job_create(vc5); - - for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++) { - if (cbufs[i]) { - vc5_flush_jobs_reading_resource(vc5, cbufs[i]->texture); - pipe_surface_reference(&job->cbufs[i], cbufs[i]); - - if (cbufs[i]->texture->nr_samples > 1) - job->msaa = true; - } - } - if (zsbuf) { - vc5_flush_jobs_reading_resource(vc5, zsbuf->texture); - pipe_surface_reference(&job->zsbuf, zsbuf); - if (zsbuf->texture->nr_samples > 1) - job->msaa = true; - } - - vc5_job_set_tile_buffer_size(job); - - for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++) { - if (cbufs[i]) - _mesa_hash_table_insert(vc5->write_jobs, - cbufs[i]->texture, job); - } - if (zsbuf) - _mesa_hash_table_insert(vc5->write_jobs, zsbuf->texture, job); - - memcpy(&job->key, &local_key, sizeof(local_key)); - _mesa_hash_table_insert(vc5->jobs, &job->key, job); - - return job; -} - -struct vc5_job * -vc5_get_job_for_fbo(struct vc5_context *vc5) -{ - if (vc5->job) - return vc5->job; - - struct pipe_surface **cbufs = vc5->framebuffer.cbufs; - struct pipe_surface *zsbuf = vc5->framebuffer.zsbuf; - struct vc5_job *job = vc5_get_job(vc5, cbufs, zsbuf); - - /* The dirty flags are tracking what's been updated while vc5->job has - * been bound, so set them all to ~0 when switching between jobs. We - * also need to reset all state at the start of rendering. - */ - vc5->dirty = ~0; - - /* If we're binding to uninitialized buffers, no need to load their - * contents before drawing. - */ - for (int i = 0; i < 4; i++) { - if (cbufs[i]) { - struct vc5_resource *rsc = vc5_resource(cbufs[i]->texture); - if (!rsc->writes) - job->cleared |= PIPE_CLEAR_COLOR0 << i; - } - } - - if (zsbuf) { - struct vc5_resource *rsc = vc5_resource(zsbuf->texture); - if (!rsc->writes) - job->cleared |= PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL; - } - - job->draw_tiles_x = DIV_ROUND_UP(vc5->framebuffer.width, - job->tile_width); - job->draw_tiles_y = DIV_ROUND_UP(vc5->framebuffer.height, - job->tile_height); - - vc5->job = job; - - return job; -} - -static bool -vc5_clif_dump_lookup(void *data, uint32_t addr, void **vaddr) -{ - struct vc5_job *job = data; - struct set_entry *entry; - - set_foreach(job->bos, entry) { - struct vc5_bo *bo = (void *)entry->key; - - if (addr >= bo->offset && - addr < bo->offset + bo->size) { - vc5_bo_map(bo); - *vaddr = bo->map + addr - bo->offset; - return true; - } - } - - return false; -} - -static void -vc5_clif_dump(struct vc5_context *vc5, struct vc5_job *job) -{ - if (!(V3D_DEBUG & V3D_DEBUG_CL)) - return; - - struct clif_dump *clif = clif_dump_init(&vc5->screen->devinfo, - stderr, vc5_clif_dump_lookup, - job); - - fprintf(stderr, "BCL: 0x%08x..0x%08x\n", - job->submit.bcl_start, job->submit.bcl_end); - - clif_dump_add_cl(clif, job->submit.bcl_start, job->submit.bcl_end); - - fprintf(stderr, "RCL: 0x%08x..0x%08x\n", - job->submit.rcl_start, job->submit.rcl_end); - clif_dump_add_cl(clif, job->submit.rcl_start, job->submit.rcl_end); -} - -/** - * Submits the job to the kernel and then reinitializes it. - */ -void -vc5_job_submit(struct vc5_context *vc5, struct vc5_job *job) -{ - MAYBE_UNUSED struct vc5_screen *screen = vc5->screen; - - if (!job->needs_flush) - goto done; - - if (vc5->screen->devinfo.ver >= 41) - v3d41_emit_rcl(job); - else - v3d33_emit_rcl(job); - - if (cl_offset(&job->bcl) > 0) { - if (screen->devinfo.ver >= 41) - v3d41_bcl_epilogue(vc5, job); - else - v3d33_bcl_epilogue(vc5, job); - } - - job->submit.out_sync = vc5->out_sync; - job->submit.bcl_end = job->bcl.bo->offset + cl_offset(&job->bcl); - job->submit.rcl_end = job->rcl.bo->offset + cl_offset(&job->rcl); - - /* On V3D 4.1, the tile alloc/state setup moved to register writes - * instead of binner packets. - */ - if (screen->devinfo.ver >= 41) { - vc5_job_add_bo(job, job->tile_alloc); - job->submit.qma = job->tile_alloc->offset; - job->submit.qms = job->tile_alloc->size; - - vc5_job_add_bo(job, job->tile_state); - job->submit.qts = job->tile_state->offset; - } - - vc5_clif_dump(vc5, job); - - if (!(V3D_DEBUG & V3D_DEBUG_NORAST)) { - int ret; - -#ifndef USE_V3D_SIMULATOR - ret = drmIoctl(vc5->fd, DRM_IOCTL_V3D_SUBMIT_CL, &job->submit); -#else - ret = vc5_simulator_flush(vc5, &job->submit, job); -#endif - static bool warned = false; - if (ret && !warned) { - fprintf(stderr, "Draw call returned %s. " - "Expect corruption.\n", strerror(errno)); - warned = true; - } - } - -done: - vc5_job_free(vc5, job); -} - -static bool -vc5_job_compare(const void *a, const void *b) -{ - return memcmp(a, b, sizeof(struct vc5_job_key)) == 0; -} - -static uint32_t -vc5_job_hash(const void *key) -{ - return _mesa_hash_data(key, sizeof(struct vc5_job_key)); -} - -void -vc5_job_init(struct vc5_context *vc5) -{ - vc5->jobs = _mesa_hash_table_create(vc5, - vc5_job_hash, - vc5_job_compare); - vc5->write_jobs = _mesa_hash_table_create(vc5, - _mesa_hash_pointer, - _mesa_key_pointer_equal); -} - diff --git a/src/gallium/drivers/vc5/vc5_program.c b/src/gallium/drivers/vc5/vc5_program.c deleted file mode 100644 index 23d2d73a2dc..00000000000 --- a/src/gallium/drivers/vc5/vc5_program.c +++ /dev/null @@ -1,682 +0,0 @@ -/* - * Copyright © 2014-2017 Broadcom - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include -#include "util/u_format.h" -#include "util/u_math.h" -#include "util/u_memory.h" -#include "util/ralloc.h" -#include "util/hash_table.h" -#include "tgsi/tgsi_dump.h" -#include "tgsi/tgsi_parse.h" -#include "compiler/nir/nir.h" -#include "compiler/nir/nir_builder.h" -#include "nir/tgsi_to_nir.h" -#include "compiler/v3d_compiler.h" -#include "vc5_context.h" -#include "broadcom/cle/v3d_packet_v33_pack.h" -#include "mesa/state_tracker/st_glsl_types.h" - -static gl_varying_slot -vc5_get_slot_for_driver_location(nir_shader *s, uint32_t driver_location) -{ - nir_foreach_variable(var, &s->outputs) { - if (var->data.driver_location == driver_location) { - return var->data.location; - } - } - - return -1; -} - -/** - * Precomputes the TRANSFORM_FEEDBACK_OUTPUT_DATA_SPEC array for the shader. - * - * A shader can have 16 of these specs, and each one of them can write up to - * 16 dwords. Since we allow a total of 64 transform feedback output - * components (not 16 vectors), we have to group the writes of multiple - * varyings together in a single data spec. - */ -static void -vc5_set_transform_feedback_outputs(struct vc5_uncompiled_shader *so, - const struct pipe_stream_output_info *stream_output) -{ - if (!stream_output->num_outputs) - return; - - struct v3d_varying_slot slots[PIPE_MAX_SO_OUTPUTS * 4]; - int slot_count = 0; - - for (int buffer = 0; buffer < PIPE_MAX_SO_BUFFERS; buffer++) { - uint32_t buffer_offset = 0; - uint32_t vpm_start = slot_count; - - for (int i = 0; i < stream_output->num_outputs; i++) { - const struct pipe_stream_output *output = - &stream_output->output[i]; - - if (output->output_buffer != buffer) - continue; - - /* We assume that the SO outputs appear in increasing - * order in the buffer. - */ - assert(output->dst_offset >= buffer_offset); - - /* Pad any undefined slots in the output */ - for (int j = buffer_offset; j < output->dst_offset; j++) { - slots[slot_count] = - v3d_slot_from_slot_and_component(VARYING_SLOT_POS, 0); - slot_count++; - buffer_offset++; - } - - /* Set the coordinate shader up to output the - * components of this varying. - */ - for (int j = 0; j < output->num_components; j++) { - gl_varying_slot slot = - vc5_get_slot_for_driver_location(so->base.ir.nir, output->register_index); - - slots[slot_count] = - v3d_slot_from_slot_and_component(slot, - output->start_component + j); - slot_count++; - buffer_offset++; - } - } - - uint32_t vpm_size = slot_count - vpm_start; - if (!vpm_size) - continue; - - uint32_t vpm_start_offset = vpm_start + 6; - - while (vpm_size) { - uint32_t write_size = MIN2(vpm_size, 1 << 4); - - struct V3D33_TRANSFORM_FEEDBACK_OUTPUT_DATA_SPEC unpacked = { - /* We need the offset from the coordinate shader's VPM - * output block, which has the [X, Y, Z, W, Xs, Ys] - * values at the start. - */ - .first_shaded_vertex_value_to_output = vpm_start_offset, - .number_of_consecutive_vertex_values_to_output_as_32_bit_values_minus_1 = write_size - 1, - .output_buffer_to_write_to = buffer, - }; - - /* GFXH-1559 */ - assert(unpacked.first_shaded_vertex_value_to_output != 8 || - so->num_tf_specs != 0); - - assert(so->num_tf_specs != ARRAY_SIZE(so->tf_specs)); - V3D33_TRANSFORM_FEEDBACK_OUTPUT_DATA_SPEC_pack(NULL, - (void *)&so->tf_specs[so->num_tf_specs], - &unpacked); - - /* If point size is being written by the shader, then - * all the VPM start offsets are shifted up by one. - * We won't know that until the variant is compiled, - * though. - */ - unpacked.first_shaded_vertex_value_to_output++; - - /* GFXH-1559 */ - assert(unpacked.first_shaded_vertex_value_to_output != 8 || - so->num_tf_specs != 0); - - V3D33_TRANSFORM_FEEDBACK_OUTPUT_DATA_SPEC_pack(NULL, - (void *)&so->tf_specs_psiz[so->num_tf_specs], - &unpacked); - so->num_tf_specs++; - vpm_start_offset += write_size; - vpm_size -= write_size; - } - } - - so->num_tf_outputs = slot_count; - so->tf_outputs = ralloc_array(so->base.ir.nir, struct v3d_varying_slot, - slot_count); - memcpy(so->tf_outputs, slots, sizeof(*slots) * slot_count); -} - -static int -type_size(const struct glsl_type *type) -{ - return glsl_count_attribute_slots(type, false); -} - -static int -uniforms_type_size(const struct glsl_type *type) -{ - return st_glsl_storage_type_size(type, false); -} - -static void * -vc5_shader_state_create(struct pipe_context *pctx, - const struct pipe_shader_state *cso) -{ - struct vc5_context *vc5 = vc5_context(pctx); - struct vc5_uncompiled_shader *so = CALLOC_STRUCT(vc5_uncompiled_shader); - if (!so) - return NULL; - - so->program_id = vc5->next_uncompiled_program_id++; - - nir_shader *s; - - if (cso->type == PIPE_SHADER_IR_NIR) { - /* The backend takes ownership of the NIR shader on state - * creation. - */ - s = cso->ir.nir; - - NIR_PASS_V(s, nir_lower_io, nir_var_all & ~nir_var_uniform, - type_size, - (nir_lower_io_options)0); - NIR_PASS_V(s, nir_lower_io, nir_var_uniform, - uniforms_type_size, - (nir_lower_io_options)0); - } else { - assert(cso->type == PIPE_SHADER_IR_TGSI); - - if (V3D_DEBUG & V3D_DEBUG_TGSI) { - fprintf(stderr, "prog %d TGSI:\n", - so->program_id); - tgsi_dump(cso->tokens, 0); - fprintf(stderr, "\n"); - } - s = tgsi_to_nir(cso->tokens, &v3d_nir_options); - - so->was_tgsi = true; - } - - NIR_PASS_V(s, nir_opt_global_to_local); - NIR_PASS_V(s, nir_lower_regs_to_ssa); - NIR_PASS_V(s, nir_normalize_cubemap_coords); - - NIR_PASS_V(s, nir_lower_load_const_to_scalar); - - v3d_optimize_nir(s); - - NIR_PASS_V(s, nir_remove_dead_variables, nir_var_local); - - /* Garbage collect dead instructions */ - nir_sweep(s); - - so->base.type = PIPE_SHADER_IR_NIR; - so->base.ir.nir = s; - - vc5_set_transform_feedback_outputs(so, &cso->stream_output); - - if (V3D_DEBUG & (V3D_DEBUG_NIR | - v3d_debug_flag_for_shader_stage(s->info.stage))) { - fprintf(stderr, "%s prog %d NIR:\n", - gl_shader_stage_name(s->info.stage), - so->program_id); - nir_print_shader(s, stderr); - fprintf(stderr, "\n"); - } - - return so; -} - -static struct vc5_compiled_shader * -vc5_get_compiled_shader(struct vc5_context *vc5, struct v3d_key *key) -{ - struct vc5_uncompiled_shader *shader_state = key->shader_state; - nir_shader *s = shader_state->base.ir.nir; - - struct hash_table *ht; - uint32_t key_size; - if (s->info.stage == MESA_SHADER_FRAGMENT) { - ht = vc5->fs_cache; - key_size = sizeof(struct v3d_fs_key); - } else { - ht = vc5->vs_cache; - key_size = sizeof(struct v3d_vs_key); - } - - struct hash_entry *entry = _mesa_hash_table_search(ht, key); - if (entry) - return entry->data; - - struct vc5_compiled_shader *shader = - rzalloc(NULL, struct vc5_compiled_shader); - - int program_id = shader_state->program_id; - int variant_id = - p_atomic_inc_return(&shader_state->compiled_variant_count); - uint64_t *qpu_insts; - uint32_t shader_size; - - switch (s->info.stage) { - case MESA_SHADER_VERTEX: - shader->prog_data.vs = rzalloc(shader, struct v3d_vs_prog_data); - - qpu_insts = v3d_compile_vs(vc5->screen->compiler, - (struct v3d_vs_key *)key, - shader->prog_data.vs, s, - program_id, variant_id, - &shader_size); - break; - case MESA_SHADER_FRAGMENT: - shader->prog_data.fs = rzalloc(shader, struct v3d_fs_prog_data); - - qpu_insts = v3d_compile_fs(vc5->screen->compiler, - (struct v3d_fs_key *)key, - shader->prog_data.fs, s, - program_id, variant_id, - &shader_size); - break; - default: - unreachable("bad stage"); - } - - vc5_set_shader_uniform_dirty_flags(shader); - - shader->bo = vc5_bo_alloc(vc5->screen, shader_size, "shader"); - vc5_bo_map(shader->bo); - memcpy(shader->bo->map, qpu_insts, shader_size); - - free(qpu_insts); - - struct vc5_key *dup_key; - dup_key = ralloc_size(shader, key_size); - memcpy(dup_key, key, key_size); - _mesa_hash_table_insert(ht, dup_key, shader); - - if (shader->prog_data.base->spill_size > - vc5->prog.spill_size_per_thread) { - /* Max 4 QPUs per slice, 3 slices per core. We only do single - * core so far. This overallocates memory on smaller cores. - */ - int total_spill_size = - 4 * 3 * shader->prog_data.base->spill_size; - - vc5_bo_unreference(&vc5->prog.spill_bo); - vc5->prog.spill_bo = vc5_bo_alloc(vc5->screen, - total_spill_size, "spill"); - vc5->prog.spill_size_per_thread = - shader->prog_data.base->spill_size; - } - - return shader; -} - -static void -vc5_setup_shared_key(struct vc5_context *vc5, struct v3d_key *key, - struct vc5_texture_stateobj *texstate) -{ - const struct v3d_device_info *devinfo = &vc5->screen->devinfo; - - for (int i = 0; i < texstate->num_textures; i++) { - struct pipe_sampler_view *sampler = texstate->textures[i]; - struct vc5_sampler_view *vc5_sampler = vc5_sampler_view(sampler); - struct pipe_sampler_state *sampler_state = - texstate->samplers[i]; - - if (!sampler) - continue; - - key->tex[i].return_size = - vc5_get_tex_return_size(devinfo, - sampler->format, - sampler_state->compare_mode); - - /* For 16-bit, we set up the sampler to always return 2 - * channels (meaning no recompiles for most statechanges), - * while for 32 we actually scale the returns with channels. - */ - if (key->tex[i].return_size == 16) { - key->tex[i].return_channels = 2; - } else if (devinfo->ver > 40) { - key->tex[i].return_channels = 4; - } else { - key->tex[i].return_channels = - vc5_get_tex_return_channels(devinfo, - sampler->format); - } - - if (key->tex[i].return_size == 32 && devinfo->ver < 40) { - memcpy(key->tex[i].swizzle, - vc5_sampler->swizzle, - sizeof(vc5_sampler->swizzle)); - } else { - /* For 16-bit returns, we let the sampler state handle - * the swizzle. - */ - key->tex[i].swizzle[0] = PIPE_SWIZZLE_X; - key->tex[i].swizzle[1] = PIPE_SWIZZLE_Y; - key->tex[i].swizzle[2] = PIPE_SWIZZLE_Z; - key->tex[i].swizzle[3] = PIPE_SWIZZLE_W; - } - - if (sampler) { - key->tex[i].compare_mode = sampler_state->compare_mode; - key->tex[i].compare_func = sampler_state->compare_func; - key->tex[i].clamp_s = - sampler_state->wrap_s == PIPE_TEX_WRAP_CLAMP; - key->tex[i].clamp_t = - sampler_state->wrap_t == PIPE_TEX_WRAP_CLAMP; - key->tex[i].clamp_r = - sampler_state->wrap_r == PIPE_TEX_WRAP_CLAMP; - } - } - - key->ucp_enables = vc5->rasterizer->base.clip_plane_enable; -} - -static void -vc5_update_compiled_fs(struct vc5_context *vc5, uint8_t prim_mode) -{ - struct vc5_job *job = vc5->job; - struct v3d_fs_key local_key; - struct v3d_fs_key *key = &local_key; - - if (!(vc5->dirty & (VC5_DIRTY_PRIM_MODE | - VC5_DIRTY_BLEND | - VC5_DIRTY_FRAMEBUFFER | - VC5_DIRTY_ZSA | - VC5_DIRTY_RASTERIZER | - VC5_DIRTY_SAMPLE_MASK | - VC5_DIRTY_FRAGTEX | - VC5_DIRTY_UNCOMPILED_FS))) { - return; - } - - memset(key, 0, sizeof(*key)); - vc5_setup_shared_key(vc5, &key->base, &vc5->fragtex); - key->base.shader_state = vc5->prog.bind_fs; - key->is_points = (prim_mode == PIPE_PRIM_POINTS); - key->is_lines = (prim_mode >= PIPE_PRIM_LINES && - prim_mode <= PIPE_PRIM_LINE_STRIP); - key->clamp_color = vc5->rasterizer->base.clamp_fragment_color; - if (vc5->blend->logicop_enable) { - key->logicop_func = vc5->blend->logicop_func; - } else { - key->logicop_func = PIPE_LOGICOP_COPY; - } - if (job->msaa) { - key->msaa = vc5->rasterizer->base.multisample; - key->sample_coverage = (vc5->rasterizer->base.multisample && - vc5->sample_mask != (1 << VC5_MAX_SAMPLES) - 1); - key->sample_alpha_to_coverage = vc5->blend->alpha_to_coverage; - key->sample_alpha_to_one = vc5->blend->alpha_to_one; - } - - key->depth_enabled = (vc5->zsa->base.depth.enabled || - vc5->zsa->base.stencil[0].enabled); - if (vc5->zsa->base.alpha.enabled) { - key->alpha_test = true; - key->alpha_test_func = vc5->zsa->base.alpha.func; - } - - /* gl_FragColor's propagation to however many bound color buffers - * there are means that the buffer count needs to be in the key. - */ - key->nr_cbufs = vc5->framebuffer.nr_cbufs; - key->swap_color_rb = vc5->swap_color_rb; - - for (int i = 0; i < key->nr_cbufs; i++) { - struct pipe_surface *cbuf = vc5->framebuffer.cbufs[i]; - if (!cbuf) - continue; - - const struct util_format_description *desc = - util_format_description(cbuf->format); - - if (desc->channel[0].type == UTIL_FORMAT_TYPE_FLOAT && - desc->channel[0].size == 32) { - key->f32_color_rb |= 1 << i; - } - - if (vc5->prog.bind_fs->was_tgsi) { - if (util_format_is_pure_uint(cbuf->format)) - key->uint_color_rb |= 1 << i; - else if (util_format_is_pure_sint(cbuf->format)) - key->int_color_rb |= 1 << i; - } - } - - if (key->is_points) { - key->point_sprite_mask = - vc5->rasterizer->base.sprite_coord_enable; - key->point_coord_upper_left = - (vc5->rasterizer->base.sprite_coord_mode == - PIPE_SPRITE_COORD_UPPER_LEFT); - } - - key->light_twoside = vc5->rasterizer->base.light_twoside; - key->shade_model_flat = vc5->rasterizer->base.flatshade; - - struct vc5_compiled_shader *old_fs = vc5->prog.fs; - vc5->prog.fs = vc5_get_compiled_shader(vc5, &key->base); - if (vc5->prog.fs == old_fs) - return; - - vc5->dirty |= VC5_DIRTY_COMPILED_FS; - - if (old_fs) { - if (vc5->prog.fs->prog_data.fs->flat_shade_flags != - old_fs->prog_data.fs->flat_shade_flags) { - vc5->dirty |= VC5_DIRTY_FLAT_SHADE_FLAGS; - } - - if (vc5->prog.fs->prog_data.fs->centroid_flags != - old_fs->prog_data.fs->centroid_flags) { - vc5->dirty |= VC5_DIRTY_CENTROID_FLAGS; - } - } - - if (old_fs && memcmp(vc5->prog.fs->prog_data.fs->input_slots, - old_fs->prog_data.fs->input_slots, - sizeof(vc5->prog.fs->prog_data.fs->input_slots))) { - vc5->dirty |= VC5_DIRTY_FS_INPUTS; - } -} - -static void -vc5_update_compiled_vs(struct vc5_context *vc5, uint8_t prim_mode) -{ - struct v3d_vs_key local_key; - struct v3d_vs_key *key = &local_key; - - if (!(vc5->dirty & (VC5_DIRTY_PRIM_MODE | - VC5_DIRTY_RASTERIZER | - VC5_DIRTY_VERTTEX | - VC5_DIRTY_VTXSTATE | - VC5_DIRTY_UNCOMPILED_VS | - VC5_DIRTY_FS_INPUTS))) { - return; - } - - memset(key, 0, sizeof(*key)); - vc5_setup_shared_key(vc5, &key->base, &vc5->verttex); - key->base.shader_state = vc5->prog.bind_vs; - key->num_fs_inputs = vc5->prog.fs->prog_data.fs->base.num_inputs; - STATIC_ASSERT(sizeof(key->fs_inputs) == - sizeof(vc5->prog.fs->prog_data.fs->input_slots)); - memcpy(key->fs_inputs, vc5->prog.fs->prog_data.fs->input_slots, - sizeof(key->fs_inputs)); - key->clamp_color = vc5->rasterizer->base.clamp_vertex_color; - - key->per_vertex_point_size = - (prim_mode == PIPE_PRIM_POINTS && - vc5->rasterizer->base.point_size_per_vertex); - - struct vc5_compiled_shader *vs = - vc5_get_compiled_shader(vc5, &key->base); - if (vs != vc5->prog.vs) { - vc5->prog.vs = vs; - vc5->dirty |= VC5_DIRTY_COMPILED_VS; - } - - key->is_coord = true; - /* Coord shaders only output varyings used by transform feedback. */ - struct vc5_uncompiled_shader *shader_state = key->base.shader_state; - memcpy(key->fs_inputs, shader_state->tf_outputs, - sizeof(*key->fs_inputs) * shader_state->num_tf_outputs); - if (shader_state->num_tf_outputs < key->num_fs_inputs) { - memset(&key->fs_inputs[shader_state->num_tf_outputs], - 0, - sizeof(*key->fs_inputs) * (key->num_fs_inputs - - shader_state->num_tf_outputs)); - } - key->num_fs_inputs = shader_state->num_tf_outputs; - - struct vc5_compiled_shader *cs = - vc5_get_compiled_shader(vc5, &key->base); - if (cs != vc5->prog.cs) { - vc5->prog.cs = cs; - vc5->dirty |= VC5_DIRTY_COMPILED_CS; - } -} - -void -vc5_update_compiled_shaders(struct vc5_context *vc5, uint8_t prim_mode) -{ - vc5_update_compiled_fs(vc5, prim_mode); - vc5_update_compiled_vs(vc5, prim_mode); -} - -static uint32_t -fs_cache_hash(const void *key) -{ - return _mesa_hash_data(key, sizeof(struct v3d_fs_key)); -} - -static uint32_t -vs_cache_hash(const void *key) -{ - return _mesa_hash_data(key, sizeof(struct v3d_vs_key)); -} - -static bool -fs_cache_compare(const void *key1, const void *key2) -{ - return memcmp(key1, key2, sizeof(struct v3d_fs_key)) == 0; -} - -static bool -vs_cache_compare(const void *key1, const void *key2) -{ - return memcmp(key1, key2, sizeof(struct v3d_vs_key)) == 0; -} - -static void -delete_from_cache_if_matches(struct hash_table *ht, - struct vc5_compiled_shader **last_compile, - struct hash_entry *entry, - struct vc5_uncompiled_shader *so) -{ - const struct v3d_key *key = entry->key; - - if (key->shader_state == so) { - struct vc5_compiled_shader *shader = entry->data; - _mesa_hash_table_remove(ht, entry); - vc5_bo_unreference(&shader->bo); - - if (shader == *last_compile) - *last_compile = NULL; - - ralloc_free(shader); - } -} - -static void -vc5_shader_state_delete(struct pipe_context *pctx, void *hwcso) -{ - struct vc5_context *vc5 = vc5_context(pctx); - struct vc5_uncompiled_shader *so = hwcso; - - struct hash_entry *entry; - hash_table_foreach(vc5->fs_cache, entry) { - delete_from_cache_if_matches(vc5->fs_cache, &vc5->prog.fs, - entry, so); - } - hash_table_foreach(vc5->vs_cache, entry) { - delete_from_cache_if_matches(vc5->vs_cache, &vc5->prog.vs, - entry, so); - } - - ralloc_free(so->base.ir.nir); - free(so); -} - -static void -vc5_fp_state_bind(struct pipe_context *pctx, void *hwcso) -{ - struct vc5_context *vc5 = vc5_context(pctx); - vc5->prog.bind_fs = hwcso; - vc5->dirty |= VC5_DIRTY_UNCOMPILED_FS; -} - -static void -vc5_vp_state_bind(struct pipe_context *pctx, void *hwcso) -{ - struct vc5_context *vc5 = vc5_context(pctx); - vc5->prog.bind_vs = hwcso; - vc5->dirty |= VC5_DIRTY_UNCOMPILED_VS; -} - -void -vc5_program_init(struct pipe_context *pctx) -{ - struct vc5_context *vc5 = vc5_context(pctx); - - pctx->create_vs_state = vc5_shader_state_create; - pctx->delete_vs_state = vc5_shader_state_delete; - - pctx->create_fs_state = vc5_shader_state_create; - pctx->delete_fs_state = vc5_shader_state_delete; - - pctx->bind_fs_state = vc5_fp_state_bind; - pctx->bind_vs_state = vc5_vp_state_bind; - - vc5->fs_cache = _mesa_hash_table_create(pctx, fs_cache_hash, - fs_cache_compare); - vc5->vs_cache = _mesa_hash_table_create(pctx, vs_cache_hash, - vs_cache_compare); -} - -void -vc5_program_fini(struct pipe_context *pctx) -{ - struct vc5_context *vc5 = vc5_context(pctx); - - struct hash_entry *entry; - hash_table_foreach(vc5->fs_cache, entry) { - struct vc5_compiled_shader *shader = entry->data; - vc5_bo_unreference(&shader->bo); - ralloc_free(shader); - _mesa_hash_table_remove(vc5->fs_cache, entry); - } - - hash_table_foreach(vc5->vs_cache, entry) { - struct vc5_compiled_shader *shader = entry->data; - vc5_bo_unreference(&shader->bo); - ralloc_free(shader); - _mesa_hash_table_remove(vc5->vs_cache, entry); - } -} diff --git a/src/gallium/drivers/vc5/vc5_query.c b/src/gallium/drivers/vc5/vc5_query.c deleted file mode 100644 index 9aa80cf536a..00000000000 --- a/src/gallium/drivers/vc5/vc5_query.c +++ /dev/null @@ -1,180 +0,0 @@ -/* - * Copyright © 2014 Broadcom - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -/** - * Gallium query object support. - * - * The HW has native support for occlusion queries, with the query result - * being loaded and stored by the TLB unit. From a SW perspective, we have to - * be careful to make sure that the jobs that need to be tracking queries are - * bracketed by the start and end of counting, even across FBO transitions. - * - * For the transform feedback PRIMITIVES_GENERATED/WRITTEN queries, we have to - * do the calculations in software at draw time. - */ - -#include "vc5_context.h" -#include "broadcom/cle/v3d_packet_v33_pack.h" - -struct vc5_query -{ - enum pipe_query_type type; - struct vc5_bo *bo; - - uint32_t start, end; -}; - -static struct pipe_query * -vc5_create_query(struct pipe_context *pctx, unsigned query_type, unsigned index) -{ - struct vc5_query *q = calloc(1, sizeof(*q)); - - q->type = query_type; - - /* Note that struct pipe_query isn't actually defined anywhere. */ - return (struct pipe_query *)q; -} - -static void -vc5_destroy_query(struct pipe_context *pctx, struct pipe_query *query) -{ - struct vc5_query *q = (struct vc5_query *)query; - - vc5_bo_unreference(&q->bo); - free(q); -} - -static boolean -vc5_begin_query(struct pipe_context *pctx, struct pipe_query *query) -{ - struct vc5_context *vc5 = vc5_context(pctx); - struct vc5_query *q = (struct vc5_query *)query; - - switch (q->type) { - case PIPE_QUERY_PRIMITIVES_GENERATED: - q->start = vc5->prims_generated; - break; - case PIPE_QUERY_PRIMITIVES_EMITTED: - q->start = vc5->tf_prims_generated; - break; - default: - q->bo = vc5_bo_alloc(vc5->screen, 4096, "query"); - - uint32_t *map = vc5_bo_map(q->bo); - *map = 0; - vc5->current_oq = q->bo; - vc5->dirty |= VC5_DIRTY_OQ; - break; - } - - return true; -} - -static bool -vc5_end_query(struct pipe_context *pctx, struct pipe_query *query) -{ - struct vc5_context *vc5 = vc5_context(pctx); - struct vc5_query *q = (struct vc5_query *)query; - - switch (q->type) { - case PIPE_QUERY_PRIMITIVES_GENERATED: - q->end = vc5->prims_generated; - break; - case PIPE_QUERY_PRIMITIVES_EMITTED: - q->end = vc5->tf_prims_generated; - break; - default: - vc5->current_oq = NULL; - vc5->dirty |= VC5_DIRTY_OQ; - break; - } - - return true; -} - -static boolean -vc5_get_query_result(struct pipe_context *pctx, struct pipe_query *query, - boolean wait, union pipe_query_result *vresult) -{ - struct vc5_query *q = (struct vc5_query *)query; - uint32_t result = 0; - - if (q->bo) { - /* XXX: Only flush the jobs using this BO. */ - vc5_flush(pctx); - - if (wait) { - if (!vc5_bo_wait(q->bo, 0, "query")) - return false; - } else { - if (!vc5_bo_wait(q->bo, ~0ull, "query")) - return false; - } - - /* XXX: Sum up per-core values. */ - uint32_t *map = vc5_bo_map(q->bo); - result = *map; - - vc5_bo_unreference(&q->bo); - } - - switch (q->type) { - case PIPE_QUERY_OCCLUSION_COUNTER: - vresult->u64 = result; - break; - case PIPE_QUERY_OCCLUSION_PREDICATE: - case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: - vresult->b = result != 0; - break; - case PIPE_QUERY_PRIMITIVES_GENERATED: - case PIPE_QUERY_PRIMITIVES_EMITTED: - vresult->u64 = q->end - q->start; - break; - default: - unreachable("unsupported query type"); - } - - return true; -} - -static void -vc5_set_active_query_state(struct pipe_context *pctx, boolean enable) -{ - struct vc5_context *vc5 = vc5_context(pctx); - - vc5->active_queries = enable; - vc5->dirty |= VC5_DIRTY_OQ; - vc5->dirty |= VC5_DIRTY_STREAMOUT; -} - -void -vc5_query_init(struct pipe_context *pctx) -{ - pctx->create_query = vc5_create_query; - pctx->destroy_query = vc5_destroy_query; - pctx->begin_query = vc5_begin_query; - pctx->end_query = vc5_end_query; - pctx->get_query_result = vc5_get_query_result; - pctx->set_active_query_state = vc5_set_active_query_state; -} - diff --git a/src/gallium/drivers/vc5/vc5_rcl.c b/src/gallium/drivers/vc5/vc5_rcl.c deleted file mode 100644 index 7d32d9ad0ea..00000000000 --- a/src/gallium/drivers/vc5/vc5_rcl.c +++ /dev/null @@ -1,782 +0,0 @@ -/* - * Copyright © 2017 Broadcom - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "util/u_format.h" -#include "vc5_context.h" -#include "vc5_tiling.h" -#include "broadcom/common/v3d_macros.h" -#include "broadcom/cle/v3dx_pack.h" - -#define PIPE_CLEAR_COLOR_BUFFERS (PIPE_CLEAR_COLOR0 | \ - PIPE_CLEAR_COLOR1 | \ - PIPE_CLEAR_COLOR2 | \ - PIPE_CLEAR_COLOR3) \ - -#define PIPE_FIRST_COLOR_BUFFER_BIT (ffs(PIPE_CLEAR_COLOR0) - 1) - -/* The HW queues up the load until the tile coordinates show up, but can only - * track one at a time. If we need to do more than one load, then we need to - * flush out the previous load by emitting the tile coordinates and doing a - * dummy store. - */ -static void -flush_last_load(struct vc5_cl *cl) -{ - if (V3D_VERSION >= 40) - return; - - cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords); - cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) { - store.buffer_to_store = NONE; - } -} - -static void -load_general(struct vc5_cl *cl, struct pipe_surface *psurf, int buffer, - uint32_t pipe_bit, uint32_t *loads_pending) -{ - struct vc5_surface *surf = vc5_surface(psurf); - bool separate_stencil = surf->separate_stencil && buffer == STENCIL; - if (separate_stencil) { - psurf = surf->separate_stencil; - surf = vc5_surface(psurf); - } - - struct vc5_resource *rsc = vc5_resource(psurf->texture); - - cl_emit(cl, LOAD_TILE_BUFFER_GENERAL, load) { - load.buffer_to_load = buffer; - load.address = cl_address(rsc->bo, surf->offset); - -#if V3D_VERSION >= 40 - load.memory_format = surf->tiling; - if (separate_stencil) - load.input_image_format = V3D_OUTPUT_IMAGE_FORMAT_S8; - else - load.input_image_format = surf->format; - - if (surf->tiling == VC5_TILING_UIF_NO_XOR || - surf->tiling == VC5_TILING_UIF_XOR) { - load.height_in_ub_or_stride = - surf->padded_height_of_output_image_in_uif_blocks; - } else if (surf->tiling == VC5_TILING_RASTER) { - struct vc5_resource_slice *slice = - &rsc->slices[psurf->u.tex.level]; - load.height_in_ub_or_stride = slice->stride; - } - - if (psurf->texture->nr_samples > 1) - load.decimate_mode = V3D_DECIMATE_MODE_ALL_SAMPLES; - else - load.decimate_mode = V3D_DECIMATE_MODE_SAMPLE_0; - -#else /* V3D_VERSION < 40 */ - /* Can't do raw ZSTENCIL loads -- need to load/store them to - * separate buffers for Z and stencil. - */ - assert(buffer != ZSTENCIL); - load.raw_mode = true; - load.padded_height_of_output_image_in_uif_blocks = - surf->padded_height_of_output_image_in_uif_blocks; -#endif /* V3D_VERSION < 40 */ - } - - *loads_pending &= ~pipe_bit; - if (*loads_pending) - flush_last_load(cl); -} - -static void -store_general(struct vc5_job *job, - struct vc5_cl *cl, struct pipe_surface *psurf, int buffer, - int pipe_bit, uint32_t *stores_pending, bool general_color_clear) -{ - struct vc5_surface *surf = vc5_surface(psurf); - bool separate_stencil = surf->separate_stencil && buffer == STENCIL; - if (separate_stencil) { - psurf = surf->separate_stencil; - surf = vc5_surface(psurf); - } - - *stores_pending &= ~pipe_bit; - bool last_store = !(*stores_pending); - - struct vc5_resource *rsc = vc5_resource(psurf->texture); - - rsc->writes++; - - cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) { - store.buffer_to_store = buffer; - store.address = cl_address(rsc->bo, surf->offset); - -#if V3D_VERSION >= 40 - store.clear_buffer_being_stored = - ((job->cleared & pipe_bit) && - (general_color_clear || - !(pipe_bit & PIPE_CLEAR_COLOR_BUFFERS))); - - if (separate_stencil) - store.output_image_format = V3D_OUTPUT_IMAGE_FORMAT_S8; - else - store.output_image_format = surf->format; - - store.memory_format = surf->tiling; - - if (surf->tiling == VC5_TILING_UIF_NO_XOR || - surf->tiling == VC5_TILING_UIF_XOR) { - store.height_in_ub_or_stride = - surf->padded_height_of_output_image_in_uif_blocks; - } else if (surf->tiling == VC5_TILING_RASTER) { - struct vc5_resource_slice *slice = - &rsc->slices[psurf->u.tex.level]; - store.height_in_ub_or_stride = slice->stride; - } - - if (psurf->texture->nr_samples > 1) - store.decimate_mode = V3D_DECIMATE_MODE_ALL_SAMPLES; - else - store.decimate_mode = V3D_DECIMATE_MODE_SAMPLE_0; - -#else /* V3D_VERSION < 40 */ - /* Can't do raw ZSTENCIL stores -- need to load/store them to - * separate buffers for Z and stencil. - */ - assert(buffer != ZSTENCIL); - store.raw_mode = true; - if (!last_store) { - store.disable_colour_buffers_clear_on_write = true; - store.disable_z_buffer_clear_on_write = true; - store.disable_stencil_buffer_clear_on_write = true; - } else { - store.disable_colour_buffers_clear_on_write = - !(((pipe_bit & PIPE_CLEAR_COLOR_BUFFERS) && - general_color_clear && - (job->cleared & pipe_bit))); - store.disable_z_buffer_clear_on_write = - !(job->cleared & PIPE_CLEAR_DEPTH); - store.disable_stencil_buffer_clear_on_write = - !(job->cleared & PIPE_CLEAR_STENCIL); - } - store.padded_height_of_output_image_in_uif_blocks = - surf->padded_height_of_output_image_in_uif_blocks; -#endif /* V3D_VERSION < 40 */ - } - - /* There must be a TILE_COORDINATES_IMPLICIT between each store. */ - if (V3D_VERSION < 40 && !last_store) { - cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords); - } -} - -static int -zs_buffer_from_pipe_bits(int pipe_clear_bits) -{ - switch (pipe_clear_bits & PIPE_CLEAR_DEPTHSTENCIL) { - case PIPE_CLEAR_DEPTHSTENCIL: - return ZSTENCIL; - case PIPE_CLEAR_DEPTH: - return Z; - case PIPE_CLEAR_STENCIL: - return STENCIL; - default: - return NONE; - } -} - -static void -vc5_rcl_emit_loads(struct vc5_job *job, struct vc5_cl *cl) -{ - uint32_t loads_pending = job->resolve & ~job->cleared; - - for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++) { - uint32_t bit = PIPE_CLEAR_COLOR0 << i; - if (!(loads_pending & bit)) - continue; - - struct pipe_surface *psurf = job->cbufs[i]; - if (!psurf || (V3D_VERSION < 40 && - psurf->texture->nr_samples <= 1)) { - continue; - } - - load_general(cl, psurf, RENDER_TARGET_0 + i, - bit, &loads_pending); - } - - if ((loads_pending & PIPE_CLEAR_DEPTHSTENCIL) && - (V3D_VERSION >= 40 || - (job->zsbuf && job->zsbuf->texture->nr_samples > 1))) { - struct vc5_resource *rsc = vc5_resource(job->zsbuf->texture); - - if (rsc->separate_stencil && - (loads_pending & PIPE_CLEAR_STENCIL)) { - load_general(cl, job->zsbuf, - STENCIL, - PIPE_CLEAR_STENCIL, - &loads_pending); - } - - if (loads_pending & PIPE_CLEAR_DEPTHSTENCIL) { - load_general(cl, job->zsbuf, - zs_buffer_from_pipe_bits(loads_pending), - loads_pending & PIPE_CLEAR_DEPTHSTENCIL, - &loads_pending); - } - } - -#if V3D_VERSION < 40 - /* The initial reload will be queued until we get the - * tile coordinates. - */ - if (loads_pending) { - cl_emit(cl, RELOAD_TILE_COLOUR_BUFFER, load) { - load.disable_colour_buffer_load = - (~loads_pending & - PIPE_CLEAR_COLOR_BUFFERS) >> - PIPE_FIRST_COLOR_BUFFER_BIT; - load.enable_z_load = - loads_pending & PIPE_CLEAR_DEPTH; - load.enable_stencil_load = - loads_pending & PIPE_CLEAR_STENCIL; - } - } -#else /* V3D_VERSION >= 40 */ - assert(!loads_pending); - cl_emit(cl, END_OF_LOADS, end); -#endif -} - -static void -vc5_rcl_emit_stores(struct vc5_job *job, struct vc5_cl *cl) -{ - MAYBE_UNUSED bool needs_color_clear = job->cleared & PIPE_CLEAR_COLOR_BUFFERS; - MAYBE_UNUSED bool needs_z_clear = job->cleared & PIPE_CLEAR_DEPTH; - MAYBE_UNUSED bool needs_s_clear = job->cleared & PIPE_CLEAR_STENCIL; - - /* For clearing color in a TLB general on V3D 3.3: - * - * - NONE buffer store clears all TLB color buffers. - * - color buffer store clears just the TLB color buffer being stored. - * - Z/S buffers store may not clear the TLB color buffer. - * - * And on V3D 4.1, we only have one flag for "clear the buffer being - * stored" in the general packet, and a separate packet to clear all - * color TLB buffers. - * - * As a result, we only bother flagging TLB color clears in a general - * packet when we don't have to emit a separate packet to clear all - * TLB color buffers. - */ - bool general_color_clear = (needs_color_clear && - (job->cleared & PIPE_CLEAR_COLOR_BUFFERS) == - (job->resolve & PIPE_CLEAR_COLOR_BUFFERS)); - - uint32_t stores_pending = job->resolve; - - /* For V3D 4.1, use general stores for all TLB stores. - * - * For V3D 3.3, we only use general stores to do raw stores for any - * MSAA surfaces. These output UIF tiled images where each 4x MSAA - * pixel is a 2x2 quad, and the format will be that of the - * internal_type/internal_bpp, rather than the format from GL's - * perspective. Non-MSAA surfaces will use - * STORE_MULTI_SAMPLE_RESOLVED_TILE_COLOR_BUFFER_EXTENDED. - */ - for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++) { - uint32_t bit = PIPE_CLEAR_COLOR0 << i; - if (!(job->resolve & bit)) - continue; - - struct pipe_surface *psurf = job->cbufs[i]; - if (!psurf || - (V3D_VERSION < 40 && psurf->texture->nr_samples <= 1)) { - continue; - } - - store_general(job, cl, psurf, RENDER_TARGET_0 + i, bit, - &stores_pending, general_color_clear); - } - - if (job->resolve & PIPE_CLEAR_DEPTHSTENCIL && job->zsbuf && - !(V3D_VERSION < 40 && job->zsbuf->texture->nr_samples <= 1)) { - struct vc5_resource *rsc = vc5_resource(job->zsbuf->texture); - if (rsc->separate_stencil) { - if (job->resolve & PIPE_CLEAR_DEPTH) { - store_general(job, cl, job->zsbuf, Z, - PIPE_CLEAR_DEPTH, - &stores_pending, - general_color_clear); - } - - if (job->resolve & PIPE_CLEAR_STENCIL) { - store_general(job, cl, job->zsbuf, STENCIL, - PIPE_CLEAR_STENCIL, - &stores_pending, - general_color_clear); - } - } else { - store_general(job, cl, job->zsbuf, - zs_buffer_from_pipe_bits(job->resolve), - job->resolve & PIPE_CLEAR_DEPTHSTENCIL, - &stores_pending, general_color_clear); - } - } - - if (stores_pending) { -#if V3D_VERSION < 40 - cl_emit(cl, STORE_MULTI_SAMPLE_RESOLVED_TILE_COLOR_BUFFER_EXTENDED, store) { - - store.disable_color_buffer_write = - (~stores_pending >> - PIPE_FIRST_COLOR_BUFFER_BIT) & 0xf; - store.enable_z_write = stores_pending & PIPE_CLEAR_DEPTH; - store.enable_stencil_write = stores_pending & PIPE_CLEAR_STENCIL; - - /* Note that when set this will clear all of the color - * buffers. - */ - store.disable_colour_buffers_clear_on_write = - !needs_color_clear; - store.disable_z_buffer_clear_on_write = - !needs_z_clear; - store.disable_stencil_buffer_clear_on_write = - !needs_s_clear; - }; -#else /* V3D_VERSION >= 40 */ - unreachable("All color buffers should have been stored."); -#endif /* V3D_VERSION >= 40 */ - } else if (needs_color_clear && !general_color_clear) { - /* If we didn't do our color clears in the general packet, - * then emit a packet to clear all the TLB color buffers now. - */ -#if V3D_VERSION < 40 - cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) { - store.buffer_to_store = NONE; - } -#else /* V3D_VERSION >= 40 */ - cl_emit(cl, CLEAR_TILE_BUFFERS, clear) { - clear.clear_all_render_targets = true; - } -#endif /* V3D_VERSION >= 40 */ - } -} - -static void -vc5_rcl_emit_generic_per_tile_list(struct vc5_job *job, int last_cbuf) -{ - /* Emit the generic list in our indirect state -- the rcl will just - * have pointers into it. - */ - struct vc5_cl *cl = &job->indirect; - vc5_cl_ensure_space(cl, 200, 1); - struct vc5_cl_reloc tile_list_start = cl_get_address(cl); - - if (V3D_VERSION >= 40) { - /* V3D 4.x only requires a single tile coordinates, and - * END_OF_LOADS switches us between loading and rendering. - */ - cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords); - } - - vc5_rcl_emit_loads(job, cl); - - if (V3D_VERSION < 40) { - /* Tile Coordinates triggers the last reload and sets where - * the stores go. There must be one per store packet. - */ - cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords); - } - - /* The binner starts out writing tiles assuming that the initial mode - * is triangles, so make sure that's the case. - */ - cl_emit(cl, PRIMITIVE_LIST_FORMAT, fmt) { - fmt.data_type = LIST_INDEXED; - fmt.primitive_type = LIST_TRIANGLES; - } - - cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch); - - vc5_rcl_emit_stores(job, cl); - -#if V3D_VERSION >= 40 - cl_emit(cl, END_OF_TILE_MARKER, end); -#endif - - cl_emit(cl, RETURN_FROM_SUB_LIST, ret); - - cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) { - branch.start = tile_list_start; - branch.end = cl_get_address(cl); - } -} - -#if V3D_VERSION >= 40 -static void -v3d_setup_render_target(struct vc5_job *job, int cbuf, - uint32_t *rt_bpp, uint32_t *rt_type, uint32_t *rt_clamp) -{ - if (!job->cbufs[cbuf]) - return; - - struct vc5_surface *surf = vc5_surface(job->cbufs[cbuf]); - *rt_bpp = surf->internal_bpp; - *rt_type = surf->internal_type; - *rt_clamp = V3D_RENDER_TARGET_CLAMP_NONE; -} - -#else /* V3D_VERSION < 40 */ - -static void -v3d_emit_z_stencil_config(struct vc5_job *job, struct vc5_surface *surf, - struct vc5_resource *rsc, bool is_separate_stencil) -{ - cl_emit(&job->rcl, TILE_RENDERING_MODE_CONFIGURATION_Z_STENCIL_CONFIG, zs) { - zs.address = cl_address(rsc->bo, surf->offset); - - if (!is_separate_stencil) { - zs.internal_type = surf->internal_type; - zs.output_image_format = surf->format; - } else { - zs.z_stencil_id = 1; /* Separate stencil */ - } - - zs.padded_height_of_output_image_in_uif_blocks = - surf->padded_height_of_output_image_in_uif_blocks; - - assert(surf->tiling != VC5_TILING_RASTER); - zs.memory_format = surf->tiling; - } - - if (job->resolve & (is_separate_stencil ? - PIPE_CLEAR_STENCIL : - PIPE_CLEAR_DEPTHSTENCIL)) { - rsc->writes++; - } -} -#endif /* V3D_VERSION < 40 */ - -#define div_round_up(a, b) (((a) + (b) - 1) / b) - -void -v3dX(emit_rcl)(struct vc5_job *job) -{ - /* The RCL list should be empty. */ - assert(!job->rcl.bo); - - vc5_cl_ensure_space_with_branch(&job->rcl, 200 + 256 * - cl_packet_length(SUPERTILE_COORDINATES)); - job->submit.rcl_start = job->rcl.bo->offset; - vc5_job_add_bo(job, job->rcl.bo); - - int nr_cbufs = 0; - for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++) { - if (job->cbufs[i]) - nr_cbufs = i + 1; - } - - /* Comon config must be the first TILE_RENDERING_MODE_CONFIGURATION - * and Z_STENCIL_CLEAR_VALUES must be last. The ones in between are - * optional updates to the previous HW state. - */ - cl_emit(&job->rcl, TILE_RENDERING_MODE_CONFIGURATION_COMMON_CONFIGURATION, - config) { -#if V3D_VERSION < 40 - config.enable_z_store = job->resolve & PIPE_CLEAR_DEPTH; - config.enable_stencil_store = job->resolve & PIPE_CLEAR_STENCIL; -#else /* V3D_VERSION >= 40 */ - if (job->zsbuf) { - struct vc5_surface *surf = vc5_surface(job->zsbuf); - config.internal_depth_type = surf->internal_type; - } -#endif /* V3D_VERSION >= 40 */ - - /* XXX: Early D/S clear */ - - switch (job->first_ez_state) { - case VC5_EZ_UNDECIDED: - case VC5_EZ_LT_LE: - config.early_z_disable = false; - config.early_z_test_and_update_direction = - EARLY_Z_DIRECTION_LT_LE; - break; - case VC5_EZ_GT_GE: - config.early_z_disable = false; - config.early_z_test_and_update_direction = - EARLY_Z_DIRECTION_GT_GE; - break; - case VC5_EZ_DISABLED: - config.early_z_disable = true; - } - - config.image_width_pixels = job->draw_width; - config.image_height_pixels = job->draw_height; - - config.number_of_render_targets_minus_1 = - MAX2(nr_cbufs, 1) - 1; - - config.multisample_mode_4x = job->msaa; - - config.maximum_bpp_of_all_render_targets = job->internal_bpp; - } - - for (int i = 0; i < nr_cbufs; i++) { - struct pipe_surface *psurf = job->cbufs[i]; - if (!psurf) - continue; - struct vc5_surface *surf = vc5_surface(psurf); - struct vc5_resource *rsc = vc5_resource(psurf->texture); - - MAYBE_UNUSED uint32_t config_pad = 0; - uint32_t clear_pad = 0; - - /* XXX: Set the pad for raster. */ - if (surf->tiling == VC5_TILING_UIF_NO_XOR || - surf->tiling == VC5_TILING_UIF_XOR) { - int uif_block_height = vc5_utile_height(rsc->cpp) * 2; - uint32_t implicit_padded_height = (align(job->draw_height, uif_block_height) / - uif_block_height); - if (surf->padded_height_of_output_image_in_uif_blocks - - implicit_padded_height < 15) { - config_pad = (surf->padded_height_of_output_image_in_uif_blocks - - implicit_padded_height); - } else { - config_pad = 15; - clear_pad = surf->padded_height_of_output_image_in_uif_blocks; - } - } - -#if V3D_VERSION < 40 - cl_emit(&job->rcl, TILE_RENDERING_MODE_CONFIGURATION_RENDER_TARGET_CONFIG, rt) { - rt.address = cl_address(rsc->bo, surf->offset); - rt.internal_type = surf->internal_type; - rt.output_image_format = surf->format; - rt.memory_format = surf->tiling; - rt.internal_bpp = surf->internal_bpp; - rt.render_target_number = i; - rt.pad = config_pad; - - if (job->resolve & PIPE_CLEAR_COLOR0 << i) - rsc->writes++; - } -#endif /* V3D_VERSION < 40 */ - - cl_emit(&job->rcl, TILE_RENDERING_MODE_CONFIGURATION_CLEAR_COLORS_PART1, - clear) { - clear.clear_color_low_32_bits = job->clear_color[i][0]; - clear.clear_color_next_24_bits = job->clear_color[i][1] & 0xffffff; - clear.render_target_number = i; - }; - - if (surf->internal_bpp >= V3D_INTERNAL_BPP_64) { - cl_emit(&job->rcl, TILE_RENDERING_MODE_CONFIGURATION_CLEAR_COLORS_PART2, - clear) { - clear.clear_color_mid_low_32_bits = - ((job->clear_color[i][1] >> 24) | - (job->clear_color[i][2] << 8)); - clear.clear_color_mid_high_24_bits = - ((job->clear_color[i][2] >> 24) | - ((job->clear_color[i][3] & 0xffff) << 8)); - clear.render_target_number = i; - }; - } - - if (surf->internal_bpp >= V3D_INTERNAL_BPP_128 || clear_pad) { - cl_emit(&job->rcl, TILE_RENDERING_MODE_CONFIGURATION_CLEAR_COLORS_PART3, - clear) { - clear.uif_padded_height_in_uif_blocks = clear_pad; - clear.clear_color_high_16_bits = job->clear_color[i][3] >> 16; - clear.render_target_number = i; - }; - } - } - -#if V3D_VERSION >= 40 - cl_emit(&job->rcl, TILE_RENDERING_MODE_CONFIGURATION_RENDER_TARGET_CONFIG, rt) { - v3d_setup_render_target(job, 0, - &rt.render_target_0_internal_bpp, - &rt.render_target_0_internal_type, - &rt.render_target_0_clamp); - v3d_setup_render_target(job, 1, - &rt.render_target_1_internal_bpp, - &rt.render_target_1_internal_type, - &rt.render_target_1_clamp); - v3d_setup_render_target(job, 2, - &rt.render_target_2_internal_bpp, - &rt.render_target_2_internal_type, - &rt.render_target_2_clamp); - v3d_setup_render_target(job, 3, - &rt.render_target_3_internal_bpp, - &rt.render_target_3_internal_type, - &rt.render_target_3_clamp); - } -#endif - -#if V3D_VERSION < 40 - /* TODO: Don't bother emitting if we don't load/clear Z/S. */ - if (job->zsbuf) { - struct pipe_surface *psurf = job->zsbuf; - struct vc5_surface *surf = vc5_surface(psurf); - struct vc5_resource *rsc = vc5_resource(psurf->texture); - - v3d_emit_z_stencil_config(job, surf, rsc, false); - - /* Emit the separate stencil packet if we have a resource for - * it. The HW will only load/store this buffer if the - * Z/Stencil config doesn't have stencil in its format. - */ - if (surf->separate_stencil) { - v3d_emit_z_stencil_config(job, - vc5_surface(surf->separate_stencil), - rsc->separate_stencil, true); - } - } -#endif /* V3D_VERSION < 40 */ - - /* Ends rendering mode config. */ - cl_emit(&job->rcl, TILE_RENDERING_MODE_CONFIGURATION_Z_STENCIL_CLEAR_VALUES, - clear) { - clear.z_clear_value = job->clear_z; - clear.stencil_vg_mask_clear_value = job->clear_s; - }; - - /* Always set initial block size before the first branch, which needs - * to match the value from binning mode config. - */ - cl_emit(&job->rcl, TILE_LIST_INITIAL_BLOCK_SIZE, init) { - init.use_auto_chained_tile_lists = true; - init.size_of_first_block_in_chained_tile_lists = - TILE_ALLOCATION_BLOCK_SIZE_64B; - } - - uint32_t supertile_w = 1, supertile_h = 1; - - /* If doing multicore binning, we would need to initialize each core's - * tile list here. - */ - cl_emit(&job->rcl, MULTICORE_RENDERING_TILE_LIST_SET_BASE, list) { - list.address = cl_address(job->tile_alloc, 0); - } - - cl_emit(&job->rcl, MULTICORE_RENDERING_SUPERTILE_CONFIGURATION, config) { - uint32_t frame_w_in_supertiles, frame_h_in_supertiles; - const uint32_t max_supertiles = 256; - - /* Size up our supertiles until we get under the limit. */ - for (;;) { - frame_w_in_supertiles = div_round_up(job->draw_tiles_x, - supertile_w); - frame_h_in_supertiles = div_round_up(job->draw_tiles_y, - supertile_h); - if (frame_w_in_supertiles * frame_h_in_supertiles < - max_supertiles) { - break; - } - - if (supertile_w < supertile_h) - supertile_w++; - else - supertile_h++; - } - - config.total_frame_width_in_tiles = job->draw_tiles_x; - config.total_frame_height_in_tiles = job->draw_tiles_y; - - config.supertile_width_in_tiles_minus_1 = supertile_w - 1; - config.supertile_height_in_tiles_minus_1 = supertile_h - 1; - - config.total_frame_width_in_supertiles = frame_w_in_supertiles; - config.total_frame_height_in_supertiles = frame_h_in_supertiles; - } - - /* Start by clearing the tile buffer. */ - cl_emit(&job->rcl, TILE_COORDINATES, coords) { - coords.tile_column_number = 0; - coords.tile_row_number = 0; - } - - /* Emit an initial clear of the tile buffers. This is necessary for - * any buffers that should be cleared (since clearing normally happens - * at the *end* of the generic tile list), but it's also nice to clear - * everything so the first tile doesn't inherit any contents from some - * previous frame. - * - * Also, implement the GFXH-1742 workaround. There's a race in the HW - * between the RCL updating the TLB's internal type/size and the - * spawning of the QPU instances using the TLB's current internal - * type/size. To make sure the QPUs get the right state,, we need 1 - * dummy store in between internal type/size changes on V3D 3.x, and 2 - * dummy stores on 4.x. - */ -#if V3D_VERSION < 40 - cl_emit(&job->rcl, STORE_TILE_BUFFER_GENERAL, store) { - store.buffer_to_store = NONE; - } -#else - for (int i = 0; i < 2; i++) { - if (i > 0) - cl_emit(&job->rcl, TILE_COORDINATES, coords); - cl_emit(&job->rcl, END_OF_LOADS, end); - cl_emit(&job->rcl, STORE_TILE_BUFFER_GENERAL, store) { - store.buffer_to_store = NONE; - } - if (i == 0) { - cl_emit(&job->rcl, CLEAR_TILE_BUFFERS, clear) { - clear.clear_z_stencil_buffer = true; - clear.clear_all_render_targets = true; - } - } - cl_emit(&job->rcl, END_OF_TILE_MARKER, end); - } -#endif - - cl_emit(&job->rcl, FLUSH_VCD_CACHE, flush); - - vc5_rcl_emit_generic_per_tile_list(job, nr_cbufs - 1); - - cl_emit(&job->rcl, WAIT_ON_SEMAPHORE, sem); - - /* XXX: Use Morton order */ - uint32_t supertile_w_in_pixels = job->tile_width * supertile_w; - uint32_t supertile_h_in_pixels = job->tile_height * supertile_h; - uint32_t min_x_supertile = job->draw_min_x / supertile_w_in_pixels; - uint32_t min_y_supertile = job->draw_min_y / supertile_h_in_pixels; - - uint32_t max_x_supertile = 0; - uint32_t max_y_supertile = 0; - if (job->draw_max_x != 0 && job->draw_max_y != 0) { - max_x_supertile = (job->draw_max_x - 1) / supertile_w_in_pixels; - max_y_supertile = (job->draw_max_y - 1) / supertile_h_in_pixels; - } - - for (int y = min_y_supertile; y <= max_y_supertile; y++) { - for (int x = min_x_supertile; x <= max_x_supertile; x++) { - cl_emit(&job->rcl, SUPERTILE_COORDINATES, coords) { - coords.column_number_in_supertiles = x; - coords.row_number_in_supertiles = y; - } - } - } - - cl_emit(&job->rcl, END_OF_RENDERING, end); -} diff --git a/src/gallium/drivers/vc5/vc5_resource.c b/src/gallium/drivers/vc5/vc5_resource.c deleted file mode 100644 index 3c2ed270770..00000000000 --- a/src/gallium/drivers/vc5/vc5_resource.c +++ /dev/null @@ -1,914 +0,0 @@ -/* - * Copyright © 2014-2017 Broadcom - * Copyright (C) 2012 Rob Clark - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "pipe/p_defines.h" -#include "util/u_blit.h" -#include "util/u_memory.h" -#include "util/u_format.h" -#include "util/u_inlines.h" -#include "util/u_surface.h" -#include "util/u_transfer_helper.h" -#include "util/u_upload_mgr.h" -#include "util/u_format_zs.h" - -#include "drm_fourcc.h" -#include "vc5_screen.h" -#include "vc5_context.h" -#include "vc5_resource.h" -#include "vc5_tiling.h" -#include "broadcom/cle/v3d_packet_v33_pack.h" - -static void -vc5_debug_resource_layout(struct vc5_resource *rsc, const char *caller) -{ - if (!(V3D_DEBUG & V3D_DEBUG_SURFACE)) - return; - - struct pipe_resource *prsc = &rsc->base; - - if (prsc->target == PIPE_BUFFER) { - fprintf(stderr, - "rsc %s %p (format %s), %dx%d buffer @0x%08x-0x%08x\n", - caller, rsc, - util_format_short_name(prsc->format), - prsc->width0, prsc->height0, - rsc->bo->offset, - rsc->bo->offset + rsc->bo->size - 1); - return; - } - - static const char *const tiling_descriptions[] = { - [VC5_TILING_RASTER] = "R", - [VC5_TILING_LINEARTILE] = "LT", - [VC5_TILING_UBLINEAR_1_COLUMN] = "UB1", - [VC5_TILING_UBLINEAR_2_COLUMN] = "UB2", - [VC5_TILING_UIF_NO_XOR] = "UIF", - [VC5_TILING_UIF_XOR] = "UIF^", - }; - - for (int i = 0; i <= prsc->last_level; i++) { - struct vc5_resource_slice *slice = &rsc->slices[i]; - - int level_width = slice->stride / rsc->cpp; - int level_height = slice->padded_height; - int level_depth = - u_minify(util_next_power_of_two(prsc->depth0), i); - - fprintf(stderr, - "rsc %s %p (format %s), %dx%d: " - "level %d (%s) %dx%dx%d -> %dx%dx%d, stride %d@0x%08x\n", - caller, rsc, - util_format_short_name(prsc->format), - prsc->width0, prsc->height0, - i, tiling_descriptions[slice->tiling], - u_minify(prsc->width0, i), - u_minify(prsc->height0, i), - u_minify(prsc->depth0, i), - level_width, - level_height, - level_depth, - slice->stride, - rsc->bo->offset + slice->offset); - } -} - -static bool -vc5_resource_bo_alloc(struct vc5_resource *rsc) -{ - struct pipe_resource *prsc = &rsc->base; - struct pipe_screen *pscreen = prsc->screen; - struct vc5_bo *bo; - - bo = vc5_bo_alloc(vc5_screen(pscreen), rsc->size, "resource"); - if (bo) { - vc5_bo_unreference(&rsc->bo); - rsc->bo = bo; - vc5_debug_resource_layout(rsc, "alloc"); - return true; - } else { - return false; - } -} - -static void -vc5_resource_transfer_unmap(struct pipe_context *pctx, - struct pipe_transfer *ptrans) -{ - struct vc5_context *vc5 = vc5_context(pctx); - struct vc5_transfer *trans = vc5_transfer(ptrans); - - if (trans->map) { - struct vc5_resource *rsc = vc5_resource(ptrans->resource); - struct vc5_resource_slice *slice = &rsc->slices[ptrans->level]; - - if (ptrans->usage & PIPE_TRANSFER_WRITE) { - for (int z = 0; z < ptrans->box.depth; z++) { - void *dst = rsc->bo->map + - vc5_layer_offset(&rsc->base, - ptrans->level, - ptrans->box.z + z); - vc5_store_tiled_image(dst, - slice->stride, - (trans->map + - ptrans->stride * - ptrans->box.height * z), - ptrans->stride, - slice->tiling, rsc->cpp, - slice->padded_height, - &ptrans->box); - } - } - free(trans->map); - } - - pipe_resource_reference(&ptrans->resource, NULL); - slab_free(&vc5->transfer_pool, ptrans); -} - -static void * -vc5_resource_transfer_map(struct pipe_context *pctx, - struct pipe_resource *prsc, - unsigned level, unsigned usage, - const struct pipe_box *box, - struct pipe_transfer **pptrans) -{ - struct vc5_context *vc5 = vc5_context(pctx); - struct vc5_resource *rsc = vc5_resource(prsc); - struct vc5_transfer *trans; - struct pipe_transfer *ptrans; - enum pipe_format format = prsc->format; - char *buf; - - /* MSAA maps should have been handled by u_transfer_helper. */ - assert(prsc->nr_samples <= 1); - - /* Upgrade DISCARD_RANGE to WHOLE_RESOURCE if the whole resource is - * being mapped. - */ - if ((usage & PIPE_TRANSFER_DISCARD_RANGE) && - !(usage & PIPE_TRANSFER_UNSYNCHRONIZED) && - !(prsc->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT) && - prsc->last_level == 0 && - prsc->width0 == box->width && - prsc->height0 == box->height && - prsc->depth0 == box->depth && - prsc->array_size == 1 && - rsc->bo->private) { - usage |= PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE; - } - - if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) { - if (vc5_resource_bo_alloc(rsc)) { - /* If it might be bound as one of our vertex buffers - * or UBOs, make sure we re-emit vertex buffer state - * or uniforms. - */ - if (prsc->bind & PIPE_BIND_VERTEX_BUFFER) - vc5->dirty |= VC5_DIRTY_VTXBUF; - if (prsc->bind & PIPE_BIND_CONSTANT_BUFFER) - vc5->dirty |= VC5_DIRTY_CONSTBUF; - } else { - /* If we failed to reallocate, flush users so that we - * don't violate any syncing requirements. - */ - vc5_flush_jobs_reading_resource(vc5, prsc); - } - } else if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) { - /* If we're writing and the buffer is being used by the CL, we - * have to flush the CL first. If we're only reading, we need - * to flush if the CL has written our buffer. - */ - if (usage & PIPE_TRANSFER_WRITE) - vc5_flush_jobs_reading_resource(vc5, prsc); - else - vc5_flush_jobs_writing_resource(vc5, prsc); - } - - if (usage & PIPE_TRANSFER_WRITE) { - rsc->writes++; - rsc->initialized_buffers = ~0; - } - - trans = slab_alloc(&vc5->transfer_pool); - if (!trans) - return NULL; - - /* XXX: Handle DONTBLOCK, DISCARD_RANGE, PERSISTENT, COHERENT. */ - - /* slab_alloc_st() doesn't zero: */ - memset(trans, 0, sizeof(*trans)); - ptrans = &trans->base; - - pipe_resource_reference(&ptrans->resource, prsc); - ptrans->level = level; - ptrans->usage = usage; - ptrans->box = *box; - - /* Note that the current kernel implementation is synchronous, so no - * need to do syncing stuff here yet. - */ - - if (usage & PIPE_TRANSFER_UNSYNCHRONIZED) - buf = vc5_bo_map_unsynchronized(rsc->bo); - else - buf = vc5_bo_map(rsc->bo); - if (!buf) { - fprintf(stderr, "Failed to map bo\n"); - goto fail; - } - - *pptrans = ptrans; - - /* Our load/store routines work on entire compressed blocks. */ - ptrans->box.x /= util_format_get_blockwidth(format); - ptrans->box.y /= util_format_get_blockheight(format); - ptrans->box.width = DIV_ROUND_UP(ptrans->box.width, - util_format_get_blockwidth(format)); - ptrans->box.height = DIV_ROUND_UP(ptrans->box.height, - util_format_get_blockheight(format)); - - struct vc5_resource_slice *slice = &rsc->slices[level]; - if (rsc->tiled) { - /* No direct mappings of tiled, since we need to manually - * tile/untile. - */ - if (usage & PIPE_TRANSFER_MAP_DIRECTLY) - return NULL; - - ptrans->stride = ptrans->box.width * rsc->cpp; - ptrans->layer_stride = ptrans->stride * ptrans->box.height; - - trans->map = malloc(ptrans->layer_stride * ptrans->box.depth); - - if (usage & PIPE_TRANSFER_READ) { - for (int z = 0; z < ptrans->box.depth; z++) { - void *src = rsc->bo->map + - vc5_layer_offset(&rsc->base, - ptrans->level, - ptrans->box.z + z); - vc5_load_tiled_image((trans->map + - ptrans->stride * - ptrans->box.height * z), - ptrans->stride, - src, - slice->stride, - slice->tiling, rsc->cpp, - slice->padded_height, - &ptrans->box); - } - } - return trans->map; - } else { - ptrans->stride = slice->stride; - ptrans->layer_stride = ptrans->stride; - - return buf + slice->offset + - ptrans->box.y * ptrans->stride + - ptrans->box.x * rsc->cpp + - ptrans->box.z * rsc->cube_map_stride; - } - - -fail: - vc5_resource_transfer_unmap(pctx, ptrans); - return NULL; -} - -static void -vc5_resource_destroy(struct pipe_screen *pscreen, - struct pipe_resource *prsc) -{ - struct vc5_resource *rsc = vc5_resource(prsc); - - vc5_bo_unreference(&rsc->bo); - free(rsc); -} - -static boolean -vc5_resource_get_handle(struct pipe_screen *pscreen, - struct pipe_context *pctx, - struct pipe_resource *prsc, - struct winsys_handle *whandle, - unsigned usage) -{ - struct vc5_resource *rsc = vc5_resource(prsc); - struct vc5_bo *bo = rsc->bo; - - whandle->stride = rsc->slices[0].stride; - - /* If we're passing some reference to our BO out to some other part of - * the system, then we can't do any optimizations about only us being - * the ones seeing it (like BO caching). - */ - bo->private = false; - - switch (whandle->type) { - case DRM_API_HANDLE_TYPE_SHARED: - return vc5_bo_flink(bo, &whandle->handle); - case DRM_API_HANDLE_TYPE_KMS: - whandle->handle = bo->handle; - return TRUE; - case DRM_API_HANDLE_TYPE_FD: - whandle->handle = vc5_bo_get_dmabuf(bo); - return whandle->handle != -1; - } - - return FALSE; -} - -#define PAGE_UB_ROWS (VC5_UIFCFG_PAGE_SIZE / VC5_UIFBLOCK_ROW_SIZE) -#define PAGE_UB_ROWS_TIMES_1_5 ((PAGE_UB_ROWS * 3) >> 1) -#define PAGE_CACHE_UB_ROWS (VC5_PAGE_CACHE_SIZE / VC5_UIFBLOCK_ROW_SIZE) -#define PAGE_CACHE_MINUS_1_5_UB_ROWS (PAGE_CACHE_UB_ROWS - PAGE_UB_ROWS_TIMES_1_5) - -/** - * Computes the HW's UIFblock padding for a given height/cpp. - * - * The goal of the padding is to keep pages of the same color (bank number) at - * least half a page away from each other vertically when crossing between - * between columns of UIF blocks. - */ -static uint32_t -vc5_get_ub_pad(struct vc5_resource *rsc, uint32_t height) -{ - uint32_t utile_h = vc5_utile_height(rsc->cpp); - uint32_t uif_block_h = utile_h * 2; - uint32_t height_ub = height / uif_block_h; - - uint32_t height_offset_in_pc = height_ub % PAGE_CACHE_UB_ROWS; - - /* For the perfectly-aligned-for-UIF-XOR case, don't add any pad. */ - if (height_offset_in_pc == 0) - return 0; - - /* Try padding up to where we're offset by at least half a page. */ - if (height_offset_in_pc < PAGE_UB_ROWS_TIMES_1_5) { - /* If we fit entirely in the page cache, don't pad. */ - if (height_ub < PAGE_CACHE_UB_ROWS) - return 0; - else - return PAGE_UB_ROWS_TIMES_1_5 - height_offset_in_pc; - } - - /* If we're close to being aligned to page cache size, then round up - * and rely on XOR. - */ - if (height_offset_in_pc > PAGE_CACHE_MINUS_1_5_UB_ROWS) - return PAGE_CACHE_UB_ROWS - height_offset_in_pc; - - /* Otherwise, we're far enough away (top and bottom) to not need any - * padding. - */ - return 0; -} - -static void -vc5_setup_slices(struct vc5_resource *rsc) -{ - struct pipe_resource *prsc = &rsc->base; - uint32_t width = prsc->width0; - uint32_t height = prsc->height0; - uint32_t depth = prsc->depth0; - /* Note that power-of-two padding is based on level 1. These are not - * equivalent to just util_next_power_of_two(dimension), because at a - * level 0 dimension of 9, the level 1 power-of-two padded value is 4, - * not 8. - */ - uint32_t pot_width = 2 * util_next_power_of_two(u_minify(width, 1)); - uint32_t pot_height = 2 * util_next_power_of_two(u_minify(height, 1)); - uint32_t pot_depth = 2 * util_next_power_of_two(u_minify(depth, 1)); - uint32_t offset = 0; - uint32_t utile_w = vc5_utile_width(rsc->cpp); - uint32_t utile_h = vc5_utile_height(rsc->cpp); - uint32_t uif_block_w = utile_w * 2; - uint32_t uif_block_h = utile_h * 2; - uint32_t block_width = util_format_get_blockwidth(prsc->format); - uint32_t block_height = util_format_get_blockheight(prsc->format); - bool msaa = prsc->nr_samples > 1; - /* MSAA textures/renderbuffers are always laid out as single-level - * UIF. - */ - bool uif_top = msaa; - - for (int i = prsc->last_level; i >= 0; i--) { - struct vc5_resource_slice *slice = &rsc->slices[i]; - - uint32_t level_width, level_height, level_depth; - if (i < 2) { - level_width = u_minify(width, i); - level_height = u_minify(height, i); - } else { - level_width = u_minify(pot_width, i); - level_height = u_minify(pot_height, i); - } - if (i < 1) - level_depth = u_minify(depth, i); - else - level_depth = u_minify(pot_depth, i); - - if (msaa) { - level_width *= 2; - level_height *= 2; - } - - level_width = DIV_ROUND_UP(level_width, block_width); - level_height = DIV_ROUND_UP(level_height, block_height); - - if (!rsc->tiled) { - slice->tiling = VC5_TILING_RASTER; - if (prsc->target == PIPE_TEXTURE_1D) - level_width = align(level_width, 64 / rsc->cpp); - } else { - if ((i != 0 || !uif_top) && - (level_width <= utile_w || - level_height <= utile_h)) { - slice->tiling = VC5_TILING_LINEARTILE; - level_width = align(level_width, utile_w); - level_height = align(level_height, utile_h); - } else if ((i != 0 || !uif_top) && - level_width <= uif_block_w) { - slice->tiling = VC5_TILING_UBLINEAR_1_COLUMN; - level_width = align(level_width, uif_block_w); - level_height = align(level_height, uif_block_h); - } else if ((i != 0 || !uif_top) && - level_width <= 2 * uif_block_w) { - slice->tiling = VC5_TILING_UBLINEAR_2_COLUMN; - level_width = align(level_width, 2 * uif_block_w); - level_height = align(level_height, uif_block_h); - } else { - /* We align the width to a 4-block column of - * UIF blocks, but we only align height to UIF - * blocks. - */ - level_width = align(level_width, - 4 * uif_block_w); - level_height = align(level_height, - uif_block_h); - - slice->ub_pad = vc5_get_ub_pad(rsc, - level_height); - level_height += slice->ub_pad * uif_block_h; - - /* If the padding set us to to be aligned to - * the page cache size, then the HW will use - * the XOR bit on odd columns to get us - * perfectly misaligned - */ - if ((level_height / uif_block_h) % - (VC5_PAGE_CACHE_SIZE / - VC5_UIFBLOCK_ROW_SIZE) == 0) { - slice->tiling = VC5_TILING_UIF_XOR; - } else { - slice->tiling = VC5_TILING_UIF_NO_XOR; - } - } - } - - slice->offset = offset; - slice->stride = level_width * rsc->cpp; - slice->padded_height = level_height; - slice->size = level_height * slice->stride; - - uint32_t slice_total_size = slice->size * level_depth; - - /* The HW aligns level 1's base to a page if any of level 1 or - * below could be UIF XOR. The lower levels then inherit the - * alignment for as long as necesary, thanks to being power of - * two aligned. - */ - if (i == 1 && - level_width > 4 * uif_block_w && - level_height > PAGE_CACHE_MINUS_1_5_UB_ROWS * uif_block_h) { - slice_total_size = align(slice_total_size, - VC5_UIFCFG_PAGE_SIZE); - } - - offset += slice_total_size; - - } - rsc->size = offset; - - /* UIF/UBLINEAR levels need to be aligned to UIF-blocks, and LT only - * needs to be aligned to utile boundaries. Since tiles are laid out - * from small to big in memory, we need to align the later UIF slices - * to UIF blocks, if they were preceded by non-UIF-block-aligned LT - * slices. - * - * We additionally align to 4k, which improves UIF XOR performance. - */ - uint32_t page_align_offset = (align(rsc->slices[0].offset, 4096) - - rsc->slices[0].offset); - if (page_align_offset) { - rsc->size += page_align_offset; - for (int i = 0; i <= prsc->last_level; i++) - rsc->slices[i].offset += page_align_offset; - } - - /* Arrays and cube textures have a stride which is the distance from - * one full mipmap tree to the next (64b aligned). For 3D textures, - * we need to program the stride between slices of miplevel 0. - */ - if (prsc->target != PIPE_TEXTURE_3D) { - rsc->cube_map_stride = align(rsc->slices[0].offset + - rsc->slices[0].size, 64); - rsc->size += rsc->cube_map_stride * (prsc->array_size - 1); - } else { - rsc->cube_map_stride = rsc->slices[0].size; - } -} - -uint32_t -vc5_layer_offset(struct pipe_resource *prsc, uint32_t level, uint32_t layer) -{ - struct vc5_resource *rsc = vc5_resource(prsc); - struct vc5_resource_slice *slice = &rsc->slices[level]; - - if (prsc->target == PIPE_TEXTURE_3D) - return slice->offset + layer * slice->size; - else - return slice->offset + layer * rsc->cube_map_stride; -} - -static struct vc5_resource * -vc5_resource_setup(struct pipe_screen *pscreen, - const struct pipe_resource *tmpl) -{ - struct vc5_screen *screen = vc5_screen(pscreen); - struct vc5_resource *rsc = CALLOC_STRUCT(vc5_resource); - if (!rsc) - return NULL; - struct pipe_resource *prsc = &rsc->base; - - *prsc = *tmpl; - - pipe_reference_init(&prsc->reference, 1); - prsc->screen = pscreen; - - if (prsc->nr_samples <= 1 || - screen->devinfo.ver >= 40 || - util_format_is_depth_or_stencil(prsc->format)) { - rsc->cpp = util_format_get_blocksize(prsc->format); - if (screen->devinfo.ver < 40 && prsc->nr_samples > 1) - rsc->cpp *= prsc->nr_samples; - } else { - assert(vc5_rt_format_supported(&screen->devinfo, prsc->format)); - uint32_t output_image_format = - vc5_get_rt_format(&screen->devinfo, prsc->format); - uint32_t internal_type; - uint32_t internal_bpp; - vc5_get_internal_type_bpp_for_output_format(&screen->devinfo, - output_image_format, - &internal_type, - &internal_bpp); - switch (internal_bpp) { - case V3D_INTERNAL_BPP_32: - rsc->cpp = 4; - break; - case V3D_INTERNAL_BPP_64: - rsc->cpp = 8; - break; - case V3D_INTERNAL_BPP_128: - rsc->cpp = 16; - break; - } - } - - assert(rsc->cpp); - - return rsc; -} - -static bool -find_modifier(uint64_t needle, const uint64_t *haystack, int count) -{ - int i; - - for (i = 0; i < count; i++) { - if (haystack[i] == needle) - return true; - } - - return false; -} - -static struct pipe_resource * -vc5_resource_create_with_modifiers(struct pipe_screen *pscreen, - const struct pipe_resource *tmpl, - const uint64_t *modifiers, - int count) -{ - bool linear_ok = find_modifier(DRM_FORMAT_MOD_LINEAR, modifiers, count); - struct vc5_resource *rsc = vc5_resource_setup(pscreen, tmpl); - struct pipe_resource *prsc = &rsc->base; - /* Use a tiled layout if we can, for better 3D performance. */ - bool should_tile = true; - - /* VBOs/PBOs are untiled (and 1 height). */ - if (tmpl->target == PIPE_BUFFER) - should_tile = false; - - /* Cursors are always linear, and the user can request linear as well. - */ - if (tmpl->bind & (PIPE_BIND_LINEAR | PIPE_BIND_CURSOR)) - should_tile = false; - - /* 1D and 1D_ARRAY textures are always raster-order. */ - if (tmpl->target == PIPE_TEXTURE_1D || - tmpl->target == PIPE_TEXTURE_1D_ARRAY) - should_tile = false; - - /* Scanout BOs for simulator need to be linear for interaction with - * i965. - */ - if (using_vc5_simulator && - tmpl->bind & (PIPE_BIND_SHARED | PIPE_BIND_SCANOUT)) - should_tile = false; - - /* No user-specified modifier; determine our own. */ - if (count == 1 && modifiers[0] == DRM_FORMAT_MOD_INVALID) { - linear_ok = true; - rsc->tiled = should_tile; - } else if (should_tile && - find_modifier(DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED, - modifiers, count)) { - rsc->tiled = true; - } else if (linear_ok) { - rsc->tiled = false; - } else { - fprintf(stderr, "Unsupported modifier requested\n"); - return NULL; - } - - rsc->internal_format = prsc->format; - - vc5_setup_slices(rsc); - if (!vc5_resource_bo_alloc(rsc)) - goto fail; - - return prsc; -fail: - vc5_resource_destroy(pscreen, prsc); - return NULL; -} - -struct pipe_resource * -vc5_resource_create(struct pipe_screen *pscreen, - const struct pipe_resource *tmpl) -{ - const uint64_t mod = DRM_FORMAT_MOD_INVALID; - return vc5_resource_create_with_modifiers(pscreen, tmpl, &mod, 1); -} - -static struct pipe_resource * -vc5_resource_from_handle(struct pipe_screen *pscreen, - const struct pipe_resource *tmpl, - struct winsys_handle *whandle, - unsigned usage) -{ - struct vc5_screen *screen = vc5_screen(pscreen); - struct vc5_resource *rsc = vc5_resource_setup(pscreen, tmpl); - struct pipe_resource *prsc = &rsc->base; - struct vc5_resource_slice *slice = &rsc->slices[0]; - - if (!rsc) - return NULL; - - switch (whandle->modifier) { - case DRM_FORMAT_MOD_LINEAR: - case DRM_FORMAT_MOD_INVALID: - rsc->tiled = false; - break; - /* XXX: UIF */ - default: - fprintf(stderr, - "Attempt to import unsupported modifier 0x%llx\n", - (long long)whandle->modifier); - goto fail; - } - - if (whandle->offset != 0) { - fprintf(stderr, - "Attempt to import unsupported winsys offset %u\n", - whandle->offset); - goto fail; - } - - switch (whandle->type) { - case DRM_API_HANDLE_TYPE_SHARED: - rsc->bo = vc5_bo_open_name(screen, - whandle->handle, whandle->stride); - break; - case DRM_API_HANDLE_TYPE_FD: - rsc->bo = vc5_bo_open_dmabuf(screen, - whandle->handle, whandle->stride); - break; - default: - fprintf(stderr, - "Attempt to import unsupported handle type %d\n", - whandle->type); - goto fail; - } - - if (!rsc->bo) - goto fail; - - rsc->internal_format = prsc->format; - - vc5_setup_slices(rsc); - vc5_debug_resource_layout(rsc, "import"); - - if (whandle->stride != slice->stride) { - static bool warned = false; - if (!warned) { - warned = true; - fprintf(stderr, - "Attempting to import %dx%d %s with " - "unsupported stride %d instead of %d\n", - prsc->width0, prsc->height0, - util_format_short_name(prsc->format), - whandle->stride, - slice->stride); - } - goto fail; - } - - return prsc; - -fail: - vc5_resource_destroy(pscreen, prsc); - return NULL; -} - -static struct pipe_surface * -vc5_create_surface(struct pipe_context *pctx, - struct pipe_resource *ptex, - const struct pipe_surface *surf_tmpl) -{ - struct vc5_context *vc5 = vc5_context(pctx); - struct vc5_screen *screen = vc5->screen; - struct vc5_surface *surface = CALLOC_STRUCT(vc5_surface); - struct vc5_resource *rsc = vc5_resource(ptex); - - if (!surface) - return NULL; - - assert(surf_tmpl->u.tex.first_layer == surf_tmpl->u.tex.last_layer); - - struct pipe_surface *psurf = &surface->base; - unsigned level = surf_tmpl->u.tex.level; - struct vc5_resource_slice *slice = &rsc->slices[level]; - - pipe_reference_init(&psurf->reference, 1); - pipe_resource_reference(&psurf->texture, ptex); - - psurf->context = pctx; - psurf->format = surf_tmpl->format; - psurf->width = u_minify(ptex->width0, level); - psurf->height = u_minify(ptex->height0, level); - psurf->u.tex.level = level; - psurf->u.tex.first_layer = surf_tmpl->u.tex.first_layer; - psurf->u.tex.last_layer = surf_tmpl->u.tex.last_layer; - - surface->offset = vc5_layer_offset(ptex, level, - psurf->u.tex.first_layer); - surface->tiling = slice->tiling; - - surface->format = vc5_get_rt_format(&screen->devinfo, psurf->format); - - if (util_format_is_depth_or_stencil(psurf->format)) { - switch (psurf->format) { - case PIPE_FORMAT_Z16_UNORM: - surface->internal_type = V3D_INTERNAL_TYPE_DEPTH_16; - break; - case PIPE_FORMAT_Z32_FLOAT: - case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: - surface->internal_type = V3D_INTERNAL_TYPE_DEPTH_32F; - break; - default: - surface->internal_type = V3D_INTERNAL_TYPE_DEPTH_24; - } - } else { - uint32_t bpp, type; - vc5_get_internal_type_bpp_for_output_format(&screen->devinfo, - surface->format, - &type, &bpp); - surface->internal_type = type; - surface->internal_bpp = bpp; - } - - if (surface->tiling == VC5_TILING_UIF_NO_XOR || - surface->tiling == VC5_TILING_UIF_XOR) { - surface->padded_height_of_output_image_in_uif_blocks = - (slice->padded_height / - (2 * vc5_utile_height(rsc->cpp))); - } - - if (rsc->separate_stencil) { - surface->separate_stencil = - vc5_create_surface(pctx, &rsc->separate_stencil->base, - surf_tmpl); - } - - return &surface->base; -} - -static void -vc5_surface_destroy(struct pipe_context *pctx, struct pipe_surface *psurf) -{ - struct vc5_surface *surf = vc5_surface(psurf); - - if (surf->separate_stencil) - pipe_surface_reference(&surf->separate_stencil, NULL); - - pipe_resource_reference(&psurf->texture, NULL); - FREE(psurf); -} - -static void -vc5_flush_resource(struct pipe_context *pctx, struct pipe_resource *resource) -{ - /* All calls to flush_resource are followed by a flush of the context, - * so there's nothing to do. - */ -} - -static enum pipe_format -vc5_resource_get_internal_format(struct pipe_resource *prsc) -{ - return vc5_resource(prsc)->internal_format; -} - -static void -vc5_resource_set_stencil(struct pipe_resource *prsc, - struct pipe_resource *stencil) -{ - vc5_resource(prsc)->separate_stencil = vc5_resource(stencil); -} - -static struct pipe_resource * -vc5_resource_get_stencil(struct pipe_resource *prsc) -{ - struct vc5_resource *rsc = vc5_resource(prsc); - - return &rsc->separate_stencil->base; -} - -static const struct u_transfer_vtbl transfer_vtbl = { - .resource_create = vc5_resource_create, - .resource_destroy = vc5_resource_destroy, - .transfer_map = vc5_resource_transfer_map, - .transfer_unmap = vc5_resource_transfer_unmap, - .transfer_flush_region = u_default_transfer_flush_region, - .get_internal_format = vc5_resource_get_internal_format, - .set_stencil = vc5_resource_set_stencil, - .get_stencil = vc5_resource_get_stencil, -}; - -void -vc5_resource_screen_init(struct pipe_screen *pscreen) -{ - pscreen->resource_create_with_modifiers = - vc5_resource_create_with_modifiers; - pscreen->resource_create = u_transfer_helper_resource_create; - pscreen->resource_from_handle = vc5_resource_from_handle; - pscreen->resource_get_handle = vc5_resource_get_handle; - pscreen->resource_destroy = u_transfer_helper_resource_destroy; - pscreen->transfer_helper = u_transfer_helper_create(&transfer_vtbl, - true, true, true); -} - -void -vc5_resource_context_init(struct pipe_context *pctx) -{ - pctx->transfer_map = u_transfer_helper_transfer_map; - pctx->transfer_flush_region = u_transfer_helper_transfer_flush_region; - pctx->transfer_unmap = u_transfer_helper_transfer_unmap; - pctx->buffer_subdata = u_default_buffer_subdata; - pctx->texture_subdata = u_default_texture_subdata; - pctx->create_surface = vc5_create_surface; - pctx->surface_destroy = vc5_surface_destroy; - pctx->resource_copy_region = util_resource_copy_region; - pctx->blit = vc5_blit; - pctx->flush_resource = vc5_flush_resource; -} diff --git a/src/gallium/drivers/vc5/vc5_resource.h b/src/gallium/drivers/vc5/vc5_resource.h deleted file mode 100644 index 2af355354ab..00000000000 --- a/src/gallium/drivers/vc5/vc5_resource.h +++ /dev/null @@ -1,175 +0,0 @@ -/* - * Copyright © 2014-2017 Broadcom - * Copyright (C) 2012 Rob Clark - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#ifndef VC5_RESOURCE_H -#define VC5_RESOURCE_H - -#include "vc5_screen.h" -#include "util/u_transfer.h" - -/* A UIFblock is a 256-byte region of memory that's 256-byte aligned. These - * will be grouped in 4x4 blocks (left-to-right, then top-to-bottom) in a 4KB - * page. Those pages are then arranged left-to-right, top-to-bottom, to cover - * an image. - * - * The inside of a UIFblock, for packed pixels, will be split into 4 64-byte - * utiles. Utiles may be 8x8 (8bpp), 8x4(16bpp) or 4x4 (32bpp). - */ - -/** - * Tiling mode enum used for vc5_resource.c, which maps directly to the Memory - * Format field of render target and Z/Stencil config. - */ -enum vc5_tiling_mode { - /* Untiled resources. Not valid as texture inputs. */ - VC5_TILING_RASTER, - - /* Single line of u-tiles. */ - VC5_TILING_LINEARTILE, - - /* Departure from standard 4-UIF block column format. */ - VC5_TILING_UBLINEAR_1_COLUMN, - - /* Departure from standard 4-UIF block column format. */ - VC5_TILING_UBLINEAR_2_COLUMN, - - /* Normal tiling format: grouped in 4x4 UIFblocks, each of which is - * split 2x2 into utiles. - */ - VC5_TILING_UIF_NO_XOR, - - /* Normal tiling format: grouped in 4x4 UIFblocks, each of which is - * split 2x2 into utiles. - */ - VC5_TILING_UIF_XOR, -}; - -struct vc5_transfer { - struct pipe_transfer base; - void *map; -}; - -struct vc5_resource_slice { - uint32_t offset; - uint32_t stride; - uint32_t padded_height; - /* Size of a single pane of the slice. For 3D textures, there will be - * a number of panes equal to the minified, power-of-two-aligned - * depth. - */ - uint32_t size; - uint8_t ub_pad; - enum vc5_tiling_mode tiling; -}; - -struct vc5_surface { - struct pipe_surface base; - uint32_t offset; - enum vc5_tiling_mode tiling; - /** - * Output image format for TILE_RENDERING_MODE_CONFIGURATION - */ - uint8_t format; - - /** - * Internal format of the tile buffer for - * TILE_RENDERING_MODE_CONFIGURATION. - */ - uint8_t internal_type; - - /** - * internal bpp value (0=32bpp, 2=128bpp) for color buffers in - * TILE_RENDERING_MODE_CONFIGURATION. - */ - uint8_t internal_bpp; - - uint32_t padded_height_of_output_image_in_uif_blocks; - - /* If the resource being referenced is separate stencil, then this is - * the surface to use when reading/writing stencil. - */ - struct pipe_surface *separate_stencil; -}; - -struct vc5_resource { - struct pipe_resource base; - struct vc5_bo *bo; - struct vc5_resource_slice slices[VC5_MAX_MIP_LEVELS]; - uint32_t cube_map_stride; - uint32_t size; - int cpp; - bool tiled; - - /** - * Number of times the resource has been written to. - * - * This is used to track whether we need to load the surface on first - * rendering. - */ - uint64_t writes; - - /** - * Bitmask of PIPE_CLEAR_COLOR0, PIPE_CLEAR_DEPTH, PIPE_CLEAR_STENCIL - * for which parts of the resource are defined. - * - * Used for avoiding fallback to quad clears for clearing just depth, - * when the stencil contents have never been initialized. Note that - * we're lazy and fields not present in the buffer (DEPTH in a color - * buffer) may get marked. - */ - uint32_t initialized_buffers; - - enum pipe_format internal_format; - - /* Resource storing the S8 part of a Z32F_S8 resource, or NULL. */ - struct vc5_resource *separate_stencil; -}; - -static inline struct vc5_resource * -vc5_resource(struct pipe_resource *prsc) -{ - return (struct vc5_resource *)prsc; -} - -static inline struct vc5_surface * -vc5_surface(struct pipe_surface *psurf) -{ - return (struct vc5_surface *)psurf; -} - -static inline struct vc5_transfer * -vc5_transfer(struct pipe_transfer *ptrans) -{ - return (struct vc5_transfer *)ptrans; -} - -void vc5_resource_screen_init(struct pipe_screen *pscreen); -void vc5_resource_context_init(struct pipe_context *pctx); -struct pipe_resource *vc5_resource_create(struct pipe_screen *pscreen, - const struct pipe_resource *tmpl); -uint32_t vc5_layer_offset(struct pipe_resource *prsc, uint32_t level, - uint32_t layer); - - -#endif /* VC5_RESOURCE_H */ diff --git a/src/gallium/drivers/vc5/vc5_screen.c b/src/gallium/drivers/vc5/vc5_screen.c deleted file mode 100644 index 9b367857322..00000000000 --- a/src/gallium/drivers/vc5/vc5_screen.c +++ /dev/null @@ -1,648 +0,0 @@ -/* - * Copyright © 2014-2017 Broadcom - * Copyright (C) 2012 Rob Clark - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "os/os_misc.h" -#include "pipe/p_defines.h" -#include "pipe/p_screen.h" -#include "pipe/p_state.h" - -#include "util/u_debug.h" -#include "util/u_memory.h" -#include "util/u_format.h" -#include "util/u_hash_table.h" -#include "util/ralloc.h" - -#include -#include "vc5_screen.h" -#include "vc5_context.h" -#include "vc5_resource.h" -#include "compiler/v3d_compiler.h" - -static const char * -vc5_screen_get_name(struct pipe_screen *pscreen) -{ - struct vc5_screen *screen = vc5_screen(pscreen); - - if (!screen->name) { - screen->name = ralloc_asprintf(screen, - "VC5 V3D %d.%d", - screen->devinfo.ver / 10, - screen->devinfo.ver % 10); - } - - return screen->name; -} - -static const char * -vc5_screen_get_vendor(struct pipe_screen *pscreen) -{ - return "Broadcom"; -} - -static void -vc5_screen_destroy(struct pipe_screen *pscreen) -{ - struct vc5_screen *screen = vc5_screen(pscreen); - - util_hash_table_destroy(screen->bo_handles); - vc5_bufmgr_destroy(pscreen); - slab_destroy_parent(&screen->transfer_pool); - - if (using_vc5_simulator) - vc5_simulator_destroy(screen); - - v3d_compiler_free(screen->compiler); - - close(screen->fd); - ralloc_free(pscreen); -} - -static int -vc5_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) -{ - struct vc5_screen *screen = vc5_screen(pscreen); - - switch (param) { - /* Supported features (boolean caps). */ - case PIPE_CAP_VERTEX_COLOR_CLAMPED: - case PIPE_CAP_VERTEX_COLOR_UNCLAMPED: - case PIPE_CAP_FRAGMENT_COLOR_CLAMPED: - case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT: - case PIPE_CAP_NPOT_TEXTURES: - case PIPE_CAP_SHAREABLE_SHADERS: - case PIPE_CAP_BLEND_EQUATION_SEPARATE: - case PIPE_CAP_TEXTURE_MULTISAMPLE: - case PIPE_CAP_TEXTURE_SWIZZLE: - case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR: - case PIPE_CAP_START_INSTANCE: - case PIPE_CAP_TGSI_INSTANCEID: - case PIPE_CAP_SM3: - case PIPE_CAP_TEXTURE_QUERY_LOD: - case PIPE_CAP_PRIMITIVE_RESTART: - case PIPE_CAP_GLSL_OPTIMIZE_CONSERVATIVELY: - case PIPE_CAP_OCCLUSION_QUERY: - case PIPE_CAP_POINT_SPRITE: - case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME: - case PIPE_CAP_ALLOW_MAPPED_BUFFERS_DURING_EXECUTION: - case PIPE_CAP_COMPUTE: - case PIPE_CAP_DRAW_INDIRECT: - case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION: - case PIPE_CAP_SIGNED_VERTEX_BUFFER_OFFSET: - return 1; - - case PIPE_CAP_INDEP_BLEND_ENABLE: - return screen->devinfo.ver >= 40; - - case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT: - return 256; - - case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT: - return 4; - - case PIPE_CAP_GLSL_FEATURE_LEVEL: - return 400; - - case PIPE_CAP_MAX_VIEWPORTS: - return 1; - - case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT: - return 1; - case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT: - return 0; - case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER: - if (screen->devinfo.ver >= 40) - return 0; - else - return 1; - case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER: - if (screen->devinfo.ver >= 40) - return 1; - else - return 0; - - case PIPE_CAP_MIXED_FRAMEBUFFER_SIZES: - case PIPE_CAP_MIXED_COLORBUFFER_FORMATS: - case PIPE_CAP_MIXED_COLOR_DEPTH_BITS: - return 1; - - - /* Stream output. */ - case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS: - return 4; - case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS: - case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS: - return 64; - - case PIPE_CAP_MIN_TEXEL_OFFSET: - case PIPE_CAP_MIN_TEXTURE_GATHER_OFFSET: - return -8; - case PIPE_CAP_MAX_TEXEL_OFFSET: - case PIPE_CAP_MAX_TEXTURE_GATHER_OFFSET: - return 7; - - /* Unsupported features. */ - case PIPE_CAP_ANISOTROPIC_FILTER: - case PIPE_CAP_TEXTURE_BUFFER_OBJECTS: - case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY: - case PIPE_CAP_CUBE_MAP_ARRAY: - case PIPE_CAP_TEXTURE_MIRROR_CLAMP: - case PIPE_CAP_SEAMLESS_CUBE_MAP: - case PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY: - case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY: - case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY: - case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS: - case PIPE_CAP_SHADER_STENCIL_EXPORT: - case PIPE_CAP_TGSI_TEXCOORD: - case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER: - case PIPE_CAP_CONDITIONAL_RENDER: - case PIPE_CAP_TEXTURE_BARRIER: - case PIPE_CAP_INDEP_BLEND_FUNC: - case PIPE_CAP_DEPTH_CLIP_DISABLE: - case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE: - case PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS: - case PIPE_CAP_USER_VERTEX_BUFFERS: - case PIPE_CAP_QUERY_PIPELINE_STATISTICS: - case PIPE_CAP_TEXTURE_BORDER_COLOR_QUIRK: - case PIPE_CAP_TGSI_VS_LAYER_VIEWPORT: - case PIPE_CAP_TGSI_TES_LAYER_VIEWPORT: - case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS: - case PIPE_CAP_TEXTURE_GATHER_SM5: - case PIPE_CAP_FAKE_SW_MSAA: - case PIPE_CAP_SAMPLE_SHADING: - case PIPE_CAP_TEXTURE_GATHER_OFFSETS: - case PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION: - case PIPE_CAP_MAX_VERTEX_STREAMS: - case PIPE_CAP_MULTI_DRAW_INDIRECT: - case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS: - case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE: - case PIPE_CAP_CONDITIONAL_RENDER_INVERTED: - case PIPE_CAP_SAMPLER_VIEW_TARGET: - case PIPE_CAP_CLIP_HALFZ: - case PIPE_CAP_VERTEXID_NOBASE: - case PIPE_CAP_POLYGON_OFFSET_CLAMP: - case PIPE_CAP_MULTISAMPLE_Z_RESOLVE: - case PIPE_CAP_RESOURCE_FROM_USER_MEMORY: - case PIPE_CAP_DEVICE_RESET_STATUS_QUERY: - case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS: - case PIPE_CAP_TEXTURE_FLOAT_LINEAR: - case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR: - case PIPE_CAP_DEPTH_BOUNDS_TEST: - case PIPE_CAP_TGSI_TXQS: - case PIPE_CAP_FORCE_PERSAMPLE_INTERP: - case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS: - case PIPE_CAP_CLEAR_TEXTURE: - case PIPE_CAP_DRAW_PARAMETERS: - case PIPE_CAP_TGSI_PACK_HALF_FLOAT: - case PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL: - case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL: - case PIPE_CAP_INVALIDATE_BUFFER: - case PIPE_CAP_GENERATE_MIPMAP: - case PIPE_CAP_STRING_MARKER: - case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS: - case PIPE_CAP_QUERY_BUFFER_OBJECT: - case PIPE_CAP_QUERY_MEMORY_INFO: - case PIPE_CAP_PCI_GROUP: - case PIPE_CAP_PCI_BUS: - case PIPE_CAP_PCI_DEVICE: - case PIPE_CAP_PCI_FUNCTION: - case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT: - case PIPE_CAP_ROBUST_BUFFER_ACCESS_BEHAVIOR: - case PIPE_CAP_CULL_DISTANCE: - case PIPE_CAP_PRIMITIVE_RESTART_FOR_PATCHES: - case PIPE_CAP_TGSI_VOTE: - case PIPE_CAP_MAX_WINDOW_RECTANGLES: - case PIPE_CAP_POLYGON_OFFSET_UNITS_UNSCALED: - case PIPE_CAP_VIEWPORT_SUBPIXEL_BITS: - case PIPE_CAP_TGSI_ARRAY_COMPONENTS: - case PIPE_CAP_TGSI_FS_FBFETCH: - case PIPE_CAP_INT64: - case PIPE_CAP_INT64_DIVMOD: - case PIPE_CAP_DOUBLES: - case PIPE_CAP_BINDLESS_TEXTURE: - case PIPE_CAP_POST_DEPTH_COVERAGE: - case PIPE_CAP_CAN_BIND_CONST_BUFFER_AS_VERTEX: - case PIPE_CAP_TGSI_BALLOT: - case PIPE_CAP_SPARSE_BUFFER_PAGE_SIZE: - case PIPE_CAP_POLYGON_MODE_FILL_RECTANGLE: - case PIPE_CAP_TGSI_CLOCK: - case PIPE_CAP_TGSI_TEX_TXF_LZ: - case PIPE_CAP_NATIVE_FENCE_FD: - case PIPE_CAP_FENCE_SIGNAL: - case PIPE_CAP_TGSI_MUL_ZERO_WINS: - case PIPE_CAP_NIR_SAMPLERS_AS_DEREF: - case PIPE_CAP_QUERY_SO_OVERFLOW: - case PIPE_CAP_MEMOBJ: - case PIPE_CAP_LOAD_CONSTBUF: - case PIPE_CAP_TILE_RASTER_ORDER: - case PIPE_CAP_STREAM_OUTPUT_INTERLEAVE_BUFFERS: - case PIPE_CAP_MAX_COMBINED_SHADER_OUTPUT_RESOURCES: - case PIPE_CAP_CONTEXT_PRIORITY_MASK: - case PIPE_CAP_CONSTBUF0_FLAGS: - case PIPE_CAP_CONSERVATIVE_RASTER_POST_SNAP_TRIANGLES: - case PIPE_CAP_CONSERVATIVE_RASTER_POST_SNAP_POINTS_LINES: - case PIPE_CAP_CONSERVATIVE_RASTER_PRE_SNAP_TRIANGLES: - case PIPE_CAP_CONSERVATIVE_RASTER_PRE_SNAP_POINTS_LINES: - case PIPE_CAP_CONSERVATIVE_RASTER_POST_DEPTH_COVERAGE: - case PIPE_CAP_MAX_CONSERVATIVE_RASTER_SUBPIXEL_PRECISION_BIAS: - case PIPE_CAP_PACKED_UNIFORMS: - return 0; - - /* Geometry shader output, unsupported. */ - case PIPE_CAP_MAX_GEOMETRY_OUTPUT_VERTICES: - case PIPE_CAP_MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS: - return 0; - - /* Texturing. */ - case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: - case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: - case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: - return VC5_MAX_MIP_LEVELS; - case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS: - return 2048; - - /* Render targets. */ - case PIPE_CAP_MAX_RENDER_TARGETS: - return 4; - - /* Queries. */ - case PIPE_CAP_QUERY_TIME_ELAPSED: - case PIPE_CAP_QUERY_TIMESTAMP: - return 0; - - case PIPE_CAP_MAX_VERTEX_ATTRIB_STRIDE: - return 2048; - - case PIPE_CAP_ENDIANNESS: - return PIPE_ENDIAN_LITTLE; - - case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT: - return 64; - - case PIPE_CAP_VENDOR_ID: - return 0x14E4; - case PIPE_CAP_DEVICE_ID: - return 0xFFFFFFFF; - case PIPE_CAP_ACCELERATED: - return 1; - case PIPE_CAP_VIDEO_MEMORY: { - uint64_t system_memory; - - if (!os_get_total_physical_memory(&system_memory)) - return 0; - - return (int)(system_memory >> 20); - } - case PIPE_CAP_UMA: - return 1; - - default: - fprintf(stderr, "unknown param %d\n", param); - return 0; - } -} - -static float -vc5_screen_get_paramf(struct pipe_screen *pscreen, enum pipe_capf param) -{ - switch (param) { - case PIPE_CAPF_MAX_LINE_WIDTH: - case PIPE_CAPF_MAX_LINE_WIDTH_AA: - return 32; - - case PIPE_CAPF_MAX_POINT_WIDTH: - case PIPE_CAPF_MAX_POINT_WIDTH_AA: - return 512.0f; - - case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY: - return 0.0f; - case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS: - return 16.0f; - - case PIPE_CAPF_MIN_CONSERVATIVE_RASTER_DILATE: - case PIPE_CAPF_MAX_CONSERVATIVE_RASTER_DILATE: - case PIPE_CAPF_CONSERVATIVE_RASTER_DILATE_GRANULARITY: - return 0.0f; - default: - fprintf(stderr, "unknown paramf %d\n", param); - return 0; - } -} - -static int -vc5_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader, - enum pipe_shader_cap param) -{ - if (shader != PIPE_SHADER_VERTEX && - shader != PIPE_SHADER_FRAGMENT) { - return 0; - } - - /* this is probably not totally correct.. but it's a start: */ - switch (param) { - case PIPE_SHADER_CAP_MAX_INSTRUCTIONS: - case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS: - case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS: - case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS: - return 16384; - - case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH: - return UINT_MAX; - - case PIPE_SHADER_CAP_MAX_INPUTS: - if (shader == PIPE_SHADER_FRAGMENT) - return VC5_MAX_FS_INPUTS / 4; - else - return 16; - case PIPE_SHADER_CAP_MAX_OUTPUTS: - if (shader == PIPE_SHADER_FRAGMENT) - return 4; - else - return VC5_MAX_FS_INPUTS / 4; - case PIPE_SHADER_CAP_MAX_TEMPS: - return 256; /* GL_MAX_PROGRAM_TEMPORARIES_ARB */ - case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE: - return 16 * 1024 * sizeof(float); - case PIPE_SHADER_CAP_MAX_CONST_BUFFERS: - return 16; - case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED: - return 0; - case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR: - case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR: - case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR: - return 0; - case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR: - return 1; - case PIPE_SHADER_CAP_SUBROUTINES: - return 0; - case PIPE_SHADER_CAP_INTEGERS: - return 1; - case PIPE_SHADER_CAP_FP16: - case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED: - case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED: - case PIPE_SHADER_CAP_TGSI_LDEXP_SUPPORTED: - case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: - case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE: - case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED: - case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTERS: - case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTER_BUFFERS: - return 0; - case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS: - case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS: - case PIPE_SHADER_CAP_MAX_SHADER_IMAGES: - case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS: - return VC5_MAX_TEXTURE_SAMPLERS; - case PIPE_SHADER_CAP_PREFERRED_IR: - return PIPE_SHADER_IR_NIR; - case PIPE_SHADER_CAP_SUPPORTED_IRS: - return 0; - case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT: - return 32; - case PIPE_SHADER_CAP_LOWER_IF_THRESHOLD: - case PIPE_SHADER_CAP_TGSI_SKIP_MERGE_REGISTERS: - return 0; - default: - fprintf(stderr, "unknown shader param %d\n", param); - return 0; - } - return 0; -} - -static boolean -vc5_screen_is_format_supported(struct pipe_screen *pscreen, - enum pipe_format format, - enum pipe_texture_target target, - unsigned sample_count, - unsigned usage) -{ - struct vc5_screen *screen = vc5_screen(pscreen); - - if (sample_count > 1 && sample_count != VC5_MAX_SAMPLES) - return FALSE; - - if ((target >= PIPE_MAX_TEXTURE_TYPES) || - !util_format_is_supported(format, usage)) { - return FALSE; - } - - if (usage & PIPE_BIND_VERTEX_BUFFER) { - switch (format) { - case PIPE_FORMAT_R32G32B32A32_FLOAT: - case PIPE_FORMAT_R32G32B32_FLOAT: - case PIPE_FORMAT_R32G32_FLOAT: - case PIPE_FORMAT_R32_FLOAT: - case PIPE_FORMAT_R32G32B32A32_SNORM: - case PIPE_FORMAT_R32G32B32_SNORM: - case PIPE_FORMAT_R32G32_SNORM: - case PIPE_FORMAT_R32_SNORM: - case PIPE_FORMAT_R32G32B32A32_SSCALED: - case PIPE_FORMAT_R32G32B32_SSCALED: - case PIPE_FORMAT_R32G32_SSCALED: - case PIPE_FORMAT_R32_SSCALED: - case PIPE_FORMAT_R16G16B16A16_UNORM: - case PIPE_FORMAT_R16G16B16_UNORM: - case PIPE_FORMAT_R16G16_UNORM: - case PIPE_FORMAT_R16_UNORM: - case PIPE_FORMAT_R16G16B16A16_SNORM: - case PIPE_FORMAT_R16G16B16_SNORM: - case PIPE_FORMAT_R16G16_SNORM: - case PIPE_FORMAT_R16_SNORM: - case PIPE_FORMAT_R16G16B16A16_USCALED: - case PIPE_FORMAT_R16G16B16_USCALED: - case PIPE_FORMAT_R16G16_USCALED: - case PIPE_FORMAT_R16_USCALED: - case PIPE_FORMAT_R16G16B16A16_SSCALED: - case PIPE_FORMAT_R16G16B16_SSCALED: - case PIPE_FORMAT_R16G16_SSCALED: - case PIPE_FORMAT_R16_SSCALED: - case PIPE_FORMAT_R8G8B8A8_UNORM: - case PIPE_FORMAT_R8G8B8_UNORM: - case PIPE_FORMAT_R8G8_UNORM: - case PIPE_FORMAT_R8_UNORM: - case PIPE_FORMAT_R8G8B8A8_SNORM: - case PIPE_FORMAT_R8G8B8_SNORM: - case PIPE_FORMAT_R8G8_SNORM: - case PIPE_FORMAT_R8_SNORM: - case PIPE_FORMAT_R8G8B8A8_USCALED: - case PIPE_FORMAT_R8G8B8_USCALED: - case PIPE_FORMAT_R8G8_USCALED: - case PIPE_FORMAT_R8_USCALED: - case PIPE_FORMAT_R8G8B8A8_SSCALED: - case PIPE_FORMAT_R8G8B8_SSCALED: - case PIPE_FORMAT_R8G8_SSCALED: - case PIPE_FORMAT_R8_SSCALED: - case PIPE_FORMAT_R10G10B10A2_UNORM: - case PIPE_FORMAT_B10G10R10A2_UNORM: - case PIPE_FORMAT_R10G10B10A2_SNORM: - case PIPE_FORMAT_B10G10R10A2_SNORM: - case PIPE_FORMAT_R10G10B10A2_USCALED: - case PIPE_FORMAT_B10G10R10A2_USCALED: - case PIPE_FORMAT_R10G10B10A2_SSCALED: - case PIPE_FORMAT_B10G10R10A2_SSCALED: - break; - default: - return FALSE; - } - } - - if ((usage & PIPE_BIND_RENDER_TARGET) && - !vc5_rt_format_supported(&screen->devinfo, format)) { - return FALSE; - } - - if ((usage & PIPE_BIND_SAMPLER_VIEW) && - !vc5_tex_format_supported(&screen->devinfo, format)) { - return FALSE; - } - - if ((usage & PIPE_BIND_DEPTH_STENCIL) && - !(format == PIPE_FORMAT_S8_UINT_Z24_UNORM || - format == PIPE_FORMAT_X8Z24_UNORM || - format == PIPE_FORMAT_Z16_UNORM || - format == PIPE_FORMAT_Z32_FLOAT || - format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT)) { - return FALSE; - } - - if ((usage & PIPE_BIND_INDEX_BUFFER) && - !(format == PIPE_FORMAT_I8_UINT || - format == PIPE_FORMAT_I16_UINT || - format == PIPE_FORMAT_I32_UINT)) { - return FALSE; - } - - return TRUE; -} - -#define PTR_TO_UINT(x) ((unsigned)((intptr_t)(x))) - -static unsigned handle_hash(void *key) -{ - return PTR_TO_UINT(key); -} - -static int handle_compare(void *key1, void *key2) -{ - return PTR_TO_UINT(key1) != PTR_TO_UINT(key2); -} - -static bool -vc5_get_device_info(struct vc5_screen *screen) -{ - struct drm_v3d_get_param ident0 = { - .param = DRM_V3D_PARAM_V3D_CORE0_IDENT0, - }; - struct drm_v3d_get_param ident1 = { - .param = DRM_V3D_PARAM_V3D_CORE0_IDENT1, - }; - int ret; - - ret = vc5_ioctl(screen->fd, DRM_IOCTL_V3D_GET_PARAM, &ident0); - if (ret != 0) { - fprintf(stderr, "Couldn't get V3D core IDENT0: %s\n", - strerror(errno)); - return false; - } - ret = vc5_ioctl(screen->fd, DRM_IOCTL_V3D_GET_PARAM, &ident1); - if (ret != 0) { - fprintf(stderr, "Couldn't get V3D core IDENT1: %s\n", - strerror(errno)); - return false; - } - - uint32_t major = (ident0.value >> 24) & 0xff; - uint32_t minor = (ident1.value >> 0) & 0xf; - screen->devinfo.ver = major * 10 + minor; - - switch (screen->devinfo.ver) { - case 33: - case 41: - case 42: - break; - default: - fprintf(stderr, - "V3D %d.%d not supported by this version of Mesa.\n", - screen->devinfo.ver / 10, - screen->devinfo.ver % 10); - return false; - } - - return true; -} - -static const void * -vc5_screen_get_compiler_options(struct pipe_screen *pscreen, - enum pipe_shader_ir ir, unsigned shader) -{ - return &v3d_nir_options; -} - -struct pipe_screen * -v3d_screen_create(int fd) -{ - struct vc5_screen *screen = rzalloc(NULL, struct vc5_screen); - struct pipe_screen *pscreen; - - pscreen = &screen->base; - - pscreen->destroy = vc5_screen_destroy; - pscreen->get_param = vc5_screen_get_param; - pscreen->get_paramf = vc5_screen_get_paramf; - pscreen->get_shader_param = vc5_screen_get_shader_param; - pscreen->context_create = vc5_context_create; - pscreen->is_format_supported = vc5_screen_is_format_supported; - - screen->fd = fd; - list_inithead(&screen->bo_cache.time_list); - (void)mtx_init(&screen->bo_handles_mutex, mtx_plain); - screen->bo_handles = util_hash_table_create(handle_hash, handle_compare); - -#if defined(USE_V3D_SIMULATOR) - vc5_simulator_init(screen); -#endif - - if (!vc5_get_device_info(screen)) - goto fail; - - slab_create_parent(&screen->transfer_pool, sizeof(struct vc5_transfer), 16); - - vc5_fence_init(screen); - - v3d_process_debug_variable(); - - vc5_resource_screen_init(pscreen); - - screen->compiler = v3d_compiler_init(&screen->devinfo); - - pscreen->get_name = vc5_screen_get_name; - pscreen->get_vendor = vc5_screen_get_vendor; - pscreen->get_device_vendor = vc5_screen_get_vendor; - pscreen->get_compiler_options = vc5_screen_get_compiler_options; - - return pscreen; - -fail: - close(fd); - ralloc_free(pscreen); - return NULL; -} diff --git a/src/gallium/drivers/vc5/vc5_screen.h b/src/gallium/drivers/vc5/vc5_screen.h deleted file mode 100644 index 975bfe01a75..00000000000 --- a/src/gallium/drivers/vc5/vc5_screen.h +++ /dev/null @@ -1,101 +0,0 @@ -/* - * Copyright © 2014-2017 Broadcom - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#ifndef VC5_SCREEN_H -#define VC5_SCREEN_H - -#include "pipe/p_screen.h" -#include "os/os_thread.h" -#include "state_tracker/drm_driver.h" -#include "util/list.h" -#include "util/slab.h" -#include "broadcom/common/v3d_debug.h" -#include "broadcom/common/v3d_device_info.h" - -struct vc5_bo; - -#define VC5_MAX_MIP_LEVELS 12 -#define VC5_MAX_TEXTURE_SAMPLERS 32 -#define VC5_MAX_SAMPLES 4 -#define VC5_MAX_DRAW_BUFFERS 4 -#define VC5_MAX_ATTRIBUTES 16 - -/* These are tunable parameters in the HW design, but all the V3D - * implementations agree. - */ -#define VC5_UIFCFG_BANKS 8 -#define VC5_UIFCFG_PAGE_SIZE 4096 -#define VC5_UIFCFG_XOR_VALUE (1 << 4) -#define VC5_PAGE_CACHE_SIZE (VC5_UIFCFG_PAGE_SIZE * VC5_UIFCFG_BANKS) -#define VC5_UBLOCK_SIZE 64 -#define VC5_UIFBLOCK_SIZE (4 * VC5_UBLOCK_SIZE) -#define VC5_UIFBLOCK_ROW_SIZE (4 * VC5_UIFBLOCK_SIZE) - -struct vc5_simulator_file; - -struct vc5_screen { - struct pipe_screen base; - int fd; - - struct v3d_device_info devinfo; - - const char *name; - - struct slab_parent_pool transfer_pool; - - struct vc5_bo_cache { - /** List of struct vc5_bo freed, by age. */ - struct list_head time_list; - /** List of struct vc5_bo freed, per size, by age. */ - struct list_head *size_list; - uint32_t size_list_size; - - mtx_t lock; - - uint32_t bo_size; - uint32_t bo_count; - } bo_cache; - - const struct v3d_compiler *compiler; - - struct util_hash_table *bo_handles; - mtx_t bo_handles_mutex; - - uint32_t bo_size; - uint32_t bo_count; - - struct vc5_simulator_file *sim_file; -}; - -static inline struct vc5_screen * -vc5_screen(struct pipe_screen *screen) -{ - return (struct vc5_screen *)screen; -} - -struct pipe_screen *v3d_screen_create(int fd); - -void -vc5_fence_init(struct vc5_screen *screen); - -#endif /* VC5_SCREEN_H */ diff --git a/src/gallium/drivers/vc5/vc5_simulator.c b/src/gallium/drivers/vc5/vc5_simulator.c deleted file mode 100644 index 4a1650074c4..00000000000 --- a/src/gallium/drivers/vc5/vc5_simulator.c +++ /dev/null @@ -1,660 +0,0 @@ -/* - * Copyright © 2014-2017 Broadcom - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -/** - * @file vc5_simulator.c - * - * Implements VC5 simulation on top of a non-VC5 GEM fd. - * - * This file's goal is to emulate the VC5 ioctls' behavior in the kernel on - * top of the simpenrose software simulator. Generally, VC5 driver BOs have a - * GEM-side copy of their contents and a simulator-side memory area that the - * GEM contents get copied into during simulation. Once simulation is done, - * the simulator's data is copied back out to the GEM BOs, so that rendering - * appears on the screen as if actual hardware rendering had been done. - * - * One of the limitations of this code is that we shouldn't really need a - * GEM-side BO for non-window-system BOs. However, do we need unique BO - * handles for each of our GEM bos so that this file can look up its state - * from the handle passed in at submit ioctl time (also, a couple of places - * outside of this file still call ioctls directly on the fd). - * - * Another limitation is that BO import doesn't work unless the underlying - * window system's BO size matches what VC5 is going to use, which of course - * doesn't work out in practice. This means that for now, only DRI3 (VC5 - * makes the winsys BOs) is supported, not DRI2 (window system makes the winys - * BOs). - */ - -#ifdef USE_V3D_SIMULATOR - -#include -#include "util/hash_table.h" -#include "util/ralloc.h" -#include "util/set.h" -#include "util/u_memory.h" -#include "util/u_mm.h" -#include "vc5_simulator_wrapper.h" - -#include "vc5_screen.h" -#include "vc5_context.h" - -/** Global (across GEM fds) state for the simulator */ -static struct vc5_simulator_state { - mtx_t mutex; - - struct v3d_hw *v3d; - int ver; - - /* Base virtual address of the heap. */ - void *mem; - /* Base hardware address of the heap. */ - uint32_t mem_base; - /* Size of the heap. */ - size_t mem_size; - - struct mem_block *heap; - struct mem_block *overflow; - - /** Mapping from GEM handle to struct vc5_simulator_bo * */ - struct hash_table *fd_map; - - int refcount; -} sim_state = { - .mutex = _MTX_INITIALIZER_NP, -}; - -/** Per-GEM-fd state for the simulator. */ -struct vc5_simulator_file { - int fd; - - /** Mapping from GEM handle to struct vc5_simulator_bo * */ - struct hash_table *bo_map; - - struct mem_block *gmp; - void *gmp_vaddr; -}; - -/** Wrapper for drm_vc5_bo tracking the simulator-specific state. */ -struct vc5_simulator_bo { - struct vc5_simulator_file *file; - - /** Area for this BO within sim_state->mem */ - struct mem_block *block; - uint32_t size; - void *vaddr; - - void *winsys_map; - uint32_t winsys_stride; - - int handle; -}; - -static void * -int_to_key(int key) -{ - return (void *)(uintptr_t)key; -} - -static struct vc5_simulator_file * -vc5_get_simulator_file_for_fd(int fd) -{ - struct hash_entry *entry = _mesa_hash_table_search(sim_state.fd_map, - int_to_key(fd + 1)); - return entry ? entry->data : NULL; -} - -/* A marker placed just after each BO, then checked after rendering to make - * sure it's still there. - */ -#define BO_SENTINEL 0xfedcba98 - -/* 128kb */ -#define GMP_ALIGN2 17 - -/** - * Sets the range of GPU virtual address space to have the given GMP - * permissions (bit 0 = read, bit 1 = write, write-only forbidden). - */ -static void -set_gmp_flags(struct vc5_simulator_file *file, - uint32_t offset, uint32_t size, uint32_t flag) -{ - assert((offset & ((1 << GMP_ALIGN2) - 1)) == 0); - int gmp_offset = offset >> GMP_ALIGN2; - int gmp_count = align(size, 1 << GMP_ALIGN2) >> GMP_ALIGN2; - uint32_t *gmp = file->gmp_vaddr; - - assert(flag <= 0x3); - - for (int i = gmp_offset; i < gmp_offset + gmp_count; i++) { - int32_t bitshift = (i % 16) * 2; - gmp[i / 16] &= ~(0x3 << bitshift); - gmp[i / 16] |= flag << bitshift; - } -} - -/** - * Allocates space in simulator memory and returns a tracking struct for it - * that also contains the drm_gem_cma_object struct. - */ -static struct vc5_simulator_bo * -vc5_create_simulator_bo(int fd, int handle, unsigned size) -{ - struct vc5_simulator_file *file = vc5_get_simulator_file_for_fd(fd); - struct vc5_simulator_bo *sim_bo = rzalloc(file, - struct vc5_simulator_bo); - size = align(size, 4096); - - sim_bo->file = file; - sim_bo->handle = handle; - - mtx_lock(&sim_state.mutex); - sim_bo->block = u_mmAllocMem(sim_state.heap, size + 4, GMP_ALIGN2, 0); - mtx_unlock(&sim_state.mutex); - assert(sim_bo->block); - - set_gmp_flags(file, sim_bo->block->ofs, size, 0x3); - - sim_bo->size = size; - sim_bo->vaddr = sim_state.mem + sim_bo->block->ofs - sim_state.mem_base; - memset(sim_bo->vaddr, 0xd0, size); - - *(uint32_t *)(sim_bo->vaddr + sim_bo->size) = BO_SENTINEL; - - /* A handle of 0 is used for vc5_gem.c internal allocations that - * don't need to go in the lookup table. - */ - if (handle != 0) { - mtx_lock(&sim_state.mutex); - _mesa_hash_table_insert(file->bo_map, int_to_key(handle), - sim_bo); - mtx_unlock(&sim_state.mutex); - } - - return sim_bo; -} - -static void -vc5_free_simulator_bo(struct vc5_simulator_bo *sim_bo) -{ - struct vc5_simulator_file *sim_file = sim_bo->file; - - if (sim_bo->winsys_map) - munmap(sim_bo->winsys_map, sim_bo->size); - - set_gmp_flags(sim_file, sim_bo->block->ofs, sim_bo->size, 0x0); - - mtx_lock(&sim_state.mutex); - u_mmFreeMem(sim_bo->block); - if (sim_bo->handle) { - struct hash_entry *entry = - _mesa_hash_table_search(sim_file->bo_map, - int_to_key(sim_bo->handle)); - _mesa_hash_table_remove(sim_file->bo_map, entry); - } - mtx_unlock(&sim_state.mutex); - ralloc_free(sim_bo); -} - -static struct vc5_simulator_bo * -vc5_get_simulator_bo(struct vc5_simulator_file *file, int gem_handle) -{ - mtx_lock(&sim_state.mutex); - struct hash_entry *entry = - _mesa_hash_table_search(file->bo_map, int_to_key(gem_handle)); - mtx_unlock(&sim_state.mutex); - - return entry ? entry->data : NULL; -} - -static int -vc5_simulator_pin_bos(int fd, struct vc5_job *job) -{ - struct vc5_simulator_file *file = vc5_get_simulator_file_for_fd(fd); - struct set_entry *entry; - - set_foreach(job->bos, entry) { - struct vc5_bo *bo = (struct vc5_bo *)entry->key; - struct vc5_simulator_bo *sim_bo = - vc5_get_simulator_bo(file, bo->handle); - - vc5_bo_map(bo); - memcpy(sim_bo->vaddr, bo->map, bo->size); - } - - return 0; -} - -static int -vc5_simulator_unpin_bos(int fd, struct vc5_job *job) -{ - struct vc5_simulator_file *file = vc5_get_simulator_file_for_fd(fd); - struct set_entry *entry; - - set_foreach(job->bos, entry) { - struct vc5_bo *bo = (struct vc5_bo *)entry->key; - struct vc5_simulator_bo *sim_bo = - vc5_get_simulator_bo(file, bo->handle); - - if (*(uint32_t *)(sim_bo->vaddr + - sim_bo->size) != BO_SENTINEL) { - fprintf(stderr, "Buffer overflow in %s\n", bo->name); - } - - vc5_bo_map(bo); - memcpy(bo->map, sim_bo->vaddr, bo->size); - } - - return 0; -} - -#if 0 -static void -vc5_dump_to_file(struct vc5_exec_info *exec) -{ - static int dumpno = 0; - struct drm_vc5_get_hang_state *state; - struct drm_vc5_get_hang_state_bo *bo_state; - unsigned int dump_version = 0; - - if (!(vc5_debug & VC5_DEBUG_DUMP)) - return; - - state = calloc(1, sizeof(*state)); - - int unref_count = 0; - list_for_each_entry_safe(struct drm_vc5_bo, bo, &exec->unref_list, - unref_head) { - unref_count++; - } - - /* Add one more for the overflow area that isn't wrapped in a BO. */ - state->bo_count = exec->bo_count + unref_count + 1; - bo_state = calloc(state->bo_count, sizeof(*bo_state)); - - char *filename = NULL; - asprintf(&filename, "vc5-dri-%d.dump", dumpno++); - FILE *f = fopen(filename, "w+"); - if (!f) { - fprintf(stderr, "Couldn't open %s: %s", filename, - strerror(errno)); - return; - } - - fwrite(&dump_version, sizeof(dump_version), 1, f); - - state->ct0ca = exec->ct0ca; - state->ct0ea = exec->ct0ea; - state->ct1ca = exec->ct1ca; - state->ct1ea = exec->ct1ea; - state->start_bin = exec->ct0ca; - state->start_render = exec->ct1ca; - fwrite(state, sizeof(*state), 1, f); - - int i; - for (i = 0; i < exec->bo_count; i++) { - struct drm_gem_cma_object *cma_bo = exec->bo[i]; - bo_state[i].handle = i; /* Not used by the parser. */ - bo_state[i].paddr = cma_bo->paddr; - bo_state[i].size = cma_bo->base.size; - } - - list_for_each_entry_safe(struct drm_vc5_bo, bo, &exec->unref_list, - unref_head) { - struct drm_gem_cma_object *cma_bo = &bo->base; - bo_state[i].handle = 0; - bo_state[i].paddr = cma_bo->paddr; - bo_state[i].size = cma_bo->base.size; - i++; - } - - /* Add the static overflow memory area. */ - bo_state[i].handle = exec->bo_count; - bo_state[i].paddr = sim_state.overflow->ofs; - bo_state[i].size = sim_state.overflow->size; - i++; - - fwrite(bo_state, sizeof(*bo_state), state->bo_count, f); - - for (int i = 0; i < exec->bo_count; i++) { - struct drm_gem_cma_object *cma_bo = exec->bo[i]; - fwrite(cma_bo->vaddr, cma_bo->base.size, 1, f); - } - - list_for_each_entry_safe(struct drm_vc5_bo, bo, &exec->unref_list, - unref_head) { - struct drm_gem_cma_object *cma_bo = &bo->base; - fwrite(cma_bo->vaddr, cma_bo->base.size, 1, f); - } - - void *overflow = calloc(1, sim_state.overflow->size); - fwrite(overflow, 1, sim_state.overflow->size, f); - free(overflow); - - free(state); - free(bo_state); - fclose(f); -} -#endif - -int -vc5_simulator_flush(struct vc5_context *vc5, - struct drm_v3d_submit_cl *submit, struct vc5_job *job) -{ - struct vc5_screen *screen = vc5->screen; - int fd = screen->fd; - struct vc5_simulator_file *file = vc5_get_simulator_file_for_fd(fd); - struct vc5_surface *csurf = vc5_surface(vc5->framebuffer.cbufs[0]); - struct vc5_resource *ctex = csurf ? vc5_resource(csurf->base.texture) : NULL; - struct vc5_simulator_bo *csim_bo = ctex ? vc5_get_simulator_bo(file, ctex->bo->handle) : NULL; - uint32_t winsys_stride = ctex ? csim_bo->winsys_stride : 0; - uint32_t sim_stride = ctex ? ctex->slices[0].stride : 0; - uint32_t row_len = MIN2(sim_stride, winsys_stride); - int ret; - - if (ctex && csim_bo->winsys_map) { -#if 0 - fprintf(stderr, "%dx%d %d %d %d\n", - ctex->base.b.width0, ctex->base.b.height0, - winsys_stride, - sim_stride, - ctex->bo->size); -#endif - - for (int y = 0; y < ctex->base.height0; y++) { - memcpy(ctex->bo->map + y * sim_stride, - csim_bo->winsys_map + y * winsys_stride, - row_len); - } - } - - ret = vc5_simulator_pin_bos(fd, job); - if (ret) - return ret; - - //vc5_dump_to_file(&exec); - - if (sim_state.ver >= 41) - v3d41_simulator_flush(sim_state.v3d, submit, file->gmp->ofs); - else - v3d33_simulator_flush(sim_state.v3d, submit, file->gmp->ofs); - - ret = vc5_simulator_unpin_bos(fd, job); - if (ret) - return ret; - - if (ctex && csim_bo->winsys_map) { - for (int y = 0; y < ctex->base.height0; y++) { - memcpy(csim_bo->winsys_map + y * winsys_stride, - ctex->bo->map + y * sim_stride, - row_len); - } - } - - return 0; -} - -/** - * Map the underlying GEM object from the real hardware GEM handle. - */ -static void * -vc5_simulator_map_winsys_bo(int fd, struct vc5_simulator_bo *sim_bo) -{ - int ret; - void *map; - - struct drm_mode_map_dumb map_dumb = { - .handle = sim_bo->handle, - }; - ret = drmIoctl(fd, DRM_IOCTL_MODE_MAP_DUMB, &map_dumb); - if (ret != 0) { - fprintf(stderr, "map ioctl failure\n"); - abort(); - } - - map = mmap(NULL, sim_bo->size, PROT_READ | PROT_WRITE, MAP_SHARED, - fd, map_dumb.offset); - if (map == MAP_FAILED) { - fprintf(stderr, - "mmap of bo %d (offset 0x%016llx, size %d) failed\n", - sim_bo->handle, (long long)map_dumb.offset, - (int)sim_bo->size); - abort(); - } - - return map; -} - -/** - * Do fixups after a BO has been opened from a handle. - * - * This could be done at DRM_IOCTL_GEM_OPEN/DRM_IOCTL_GEM_PRIME_FD_TO_HANDLE - * time, but we're still using drmPrimeFDToHandle() so we have this helper to - * be called afterward instead. - */ -void vc5_simulator_open_from_handle(int fd, uint32_t winsys_stride, - int handle, uint32_t size) -{ - struct vc5_simulator_bo *sim_bo = - vc5_create_simulator_bo(fd, handle, size); - - sim_bo->winsys_stride = winsys_stride; - sim_bo->winsys_map = vc5_simulator_map_winsys_bo(fd, sim_bo); -} - -/** - * Simulated ioctl(fd, DRM_VC5_CREATE_BO) implementation. - * - * Making a VC5 BO is just a matter of making a corresponding BO on the host. - */ -static int -vc5_simulator_create_bo_ioctl(int fd, struct drm_v3d_create_bo *args) -{ - int ret; - struct drm_mode_create_dumb create = { - .width = 128, - .bpp = 8, - .height = (args->size + 127) / 128, - }; - - ret = drmIoctl(fd, DRM_IOCTL_MODE_CREATE_DUMB, &create); - assert(create.size >= args->size); - - args->handle = create.handle; - - struct vc5_simulator_bo *sim_bo = - vc5_create_simulator_bo(fd, create.handle, args->size); - - args->offset = sim_bo->block->ofs; - - return ret; -} - -/** - * Simulated ioctl(fd, DRM_VC5_MMAP_BO) implementation. - * - * We just pass this straight through to dumb mmap. - */ -static int -vc5_simulator_mmap_bo_ioctl(int fd, struct drm_v3d_mmap_bo *args) -{ - int ret; - struct drm_mode_map_dumb map = { - .handle = args->handle, - }; - - ret = drmIoctl(fd, DRM_IOCTL_MODE_MAP_DUMB, &map); - args->offset = map.offset; - - return ret; -} - -static int -vc5_simulator_get_bo_offset_ioctl(int fd, struct drm_v3d_get_bo_offset *args) -{ - struct vc5_simulator_file *file = vc5_get_simulator_file_for_fd(fd); - struct vc5_simulator_bo *sim_bo = vc5_get_simulator_bo(file, - args->handle); - - args->offset = sim_bo->block->ofs; - - return 0; -} - -static int -vc5_simulator_gem_close_ioctl(int fd, struct drm_gem_close *args) -{ - /* Free the simulator's internal tracking. */ - struct vc5_simulator_file *file = vc5_get_simulator_file_for_fd(fd); - struct vc5_simulator_bo *sim_bo = vc5_get_simulator_bo(file, - args->handle); - - vc5_free_simulator_bo(sim_bo); - - /* Pass the call on down. */ - return drmIoctl(fd, DRM_IOCTL_GEM_CLOSE, args); -} - -static int -vc5_simulator_get_param_ioctl(int fd, struct drm_v3d_get_param *args) -{ - if (sim_state.ver >= 41) - return v3d41_simulator_get_param_ioctl(sim_state.v3d, args); - else - return v3d33_simulator_get_param_ioctl(sim_state.v3d, args); -} - -int -vc5_simulator_ioctl(int fd, unsigned long request, void *args) -{ - switch (request) { - case DRM_IOCTL_V3D_CREATE_BO: - return vc5_simulator_create_bo_ioctl(fd, args); - case DRM_IOCTL_V3D_MMAP_BO: - return vc5_simulator_mmap_bo_ioctl(fd, args); - case DRM_IOCTL_V3D_GET_BO_OFFSET: - return vc5_simulator_get_bo_offset_ioctl(fd, args); - - case DRM_IOCTL_V3D_WAIT_BO: - /* We do all of the vc5 rendering synchronously, so we just - * return immediately on the wait ioctls. This ignores any - * native rendering to the host BO, so it does mean we race on - * front buffer rendering. - */ - return 0; - - case DRM_IOCTL_V3D_GET_PARAM: - return vc5_simulator_get_param_ioctl(fd, args); - - case DRM_IOCTL_GEM_CLOSE: - return vc5_simulator_gem_close_ioctl(fd, args); - - case DRM_IOCTL_GEM_OPEN: - case DRM_IOCTL_GEM_FLINK: - return drmIoctl(fd, request, args); - default: - fprintf(stderr, "Unknown ioctl 0x%08x\n", (int)request); - abort(); - } -} - -static void -vc5_simulator_init_global(const struct v3d_device_info *devinfo) -{ - mtx_lock(&sim_state.mutex); - if (sim_state.refcount++) { - mtx_unlock(&sim_state.mutex); - return; - } - - sim_state.v3d = v3d_hw_auto_new(NULL); - v3d_hw_alloc_mem(sim_state.v3d, 1024 * 1024 * 1024); - sim_state.mem_base = - v3d_hw_get_mem(sim_state.v3d, &sim_state.mem_size, - &sim_state.mem); - - /* Allocate from anywhere from 4096 up. We don't allocate at 0, - * because for OQs and some other addresses in the HW, 0 means - * disabled. - */ - sim_state.heap = u_mmInit(4096, sim_state.mem_size - 4096); - - /* Make a block of 0xd0 at address 0 to make sure we don't screw up - * and land there. - */ - struct mem_block *b = u_mmAllocMem(sim_state.heap, 4096, GMP_ALIGN2, 0); - memset(sim_state.mem + b->ofs - sim_state.mem_base, 0xd0, 4096); - - sim_state.ver = v3d_hw_get_version(sim_state.v3d); - - mtx_unlock(&sim_state.mutex); - - sim_state.fd_map = - _mesa_hash_table_create(NULL, - _mesa_hash_pointer, - _mesa_key_pointer_equal); - - if (sim_state.ver >= 41) - v3d41_simulator_init_regs(sim_state.v3d); - else - v3d33_simulator_init_regs(sim_state.v3d); -} - -void -vc5_simulator_init(struct vc5_screen *screen) -{ - vc5_simulator_init_global(&screen->devinfo); - - screen->sim_file = rzalloc(screen, struct vc5_simulator_file); - struct vc5_simulator_file *sim_file = screen->sim_file; - - screen->sim_file->bo_map = - _mesa_hash_table_create(screen->sim_file, - _mesa_hash_pointer, - _mesa_key_pointer_equal); - - mtx_lock(&sim_state.mutex); - _mesa_hash_table_insert(sim_state.fd_map, int_to_key(screen->fd + 1), - screen->sim_file); - mtx_unlock(&sim_state.mutex); - - sim_file->gmp = u_mmAllocMem(sim_state.heap, 8096, GMP_ALIGN2, 0); - sim_file->gmp_vaddr = (sim_state.mem + sim_file->gmp->ofs - - sim_state.mem_base); -} - -void -vc5_simulator_destroy(struct vc5_screen *screen) -{ - mtx_lock(&sim_state.mutex); - if (!--sim_state.refcount) { - _mesa_hash_table_destroy(sim_state.fd_map, NULL); - u_mmDestroy(sim_state.heap); - /* No memsetting the struct, because it contains the mutex. */ - sim_state.mem = NULL; - } - mtx_unlock(&sim_state.mutex); -} - -#endif /* USE_V3D_SIMULATOR */ diff --git a/src/gallium/drivers/vc5/vc5_simulator_wrapper.cpp b/src/gallium/drivers/vc5/vc5_simulator_wrapper.cpp deleted file mode 100644 index bee3ce70bfd..00000000000 --- a/src/gallium/drivers/vc5/vc5_simulator_wrapper.cpp +++ /dev/null @@ -1,88 +0,0 @@ -/* - * Copyright © 2017 Broadcom - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -/** @file - * - * Wraps bits of the V3D simulator interface in a C interface for the - * vc5_simulator.c code to use. - */ - -#ifdef USE_V3D_SIMULATOR - -#include "vc5_simulator_wrapper.h" - -#define V3D_TECH_VERSION 3 -#define V3D_REVISION 3 -#define V3D_SUB_REV 0 -#define V3D_HIDDEN_REV 0 -#define V3D_COMPAT_REV 0 -#include "v3d_hw_auto.h" - -extern "C" { - -struct v3d_hw *v3d_hw_auto_new(void *in_params) -{ - return v3d_hw_auto_make_unique().release(); -} - - -uint32_t v3d_hw_get_mem(const struct v3d_hw *hw, size_t *size, void **p) -{ - return hw->get_mem(size, p); -} - -bool v3d_hw_alloc_mem(struct v3d_hw *hw, size_t min_size) -{ - return hw->alloc_mem(min_size) == V3D_HW_ALLOC_SUCCESS; -} - -bool v3d_hw_has_gca(struct v3d_hw *hw) -{ - return hw->has_gca(); -} - -uint32_t v3d_hw_read_reg(struct v3d_hw *hw, uint32_t reg) -{ - return hw->read_reg(reg); -} - -void v3d_hw_write_reg(struct v3d_hw *hw, uint32_t reg, uint32_t val) -{ - hw->write_reg(reg, val); -} - -void v3d_hw_tick(struct v3d_hw *hw) -{ - return hw->tick(); -} - -int v3d_hw_get_version(struct v3d_hw *hw) -{ - const V3D_HUB_IDENT_T *ident = hw->get_hub_ident(); - - return ident->tech_version * 10 + ident->revision; -} - -} - -#endif /* USE_V3D_SIMULATOR */ diff --git a/src/gallium/drivers/vc5/vc5_simulator_wrapper.h b/src/gallium/drivers/vc5/vc5_simulator_wrapper.h deleted file mode 100644 index 8b5dca15ed9..00000000000 --- a/src/gallium/drivers/vc5/vc5_simulator_wrapper.h +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Copyright © 2017 Broadcom - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include -#include - -struct v3d_hw; - -#ifdef __cplusplus -extern "C" { -#endif - -struct v3d_hw *v3d_hw_auto_new(void *params); -uint32_t v3d_hw_get_mem(const struct v3d_hw *hw, size_t *size, void **p); -bool v3d_hw_alloc_mem(struct v3d_hw *hw, size_t min_size); -bool v3d_hw_has_gca(struct v3d_hw *hw); -uint32_t v3d_hw_read_reg(struct v3d_hw *hw, uint32_t reg); -void v3d_hw_write_reg(struct v3d_hw *hw, uint32_t reg, uint32_t val); -void v3d_hw_tick(struct v3d_hw *hw); -int v3d_hw_get_version(struct v3d_hw *hw); - -#ifdef __cplusplus -} -#endif diff --git a/src/gallium/drivers/vc5/vc5_state.c b/src/gallium/drivers/vc5/vc5_state.c deleted file mode 100644 index 0ed0acd16ac..00000000000 --- a/src/gallium/drivers/vc5/vc5_state.c +++ /dev/null @@ -1,951 +0,0 @@ -/* - * Copyright © 2014-2017 Broadcom - * Copyright (C) 2012 Rob Clark - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "pipe/p_state.h" -#include "util/u_format.h" -#include "util/u_framebuffer.h" -#include "util/u_inlines.h" -#include "util/u_math.h" -#include "util/u_memory.h" -#include "util/u_half.h" -#include "util/u_helpers.h" - -#include "vc5_context.h" -#include "vc5_tiling.h" -#include "broadcom/common/v3d_macros.h" -#include "broadcom/cle/v3dx_pack.h" - -static void * -vc5_generic_cso_state_create(const void *src, uint32_t size) -{ - void *dst = calloc(1, size); - if (!dst) - return NULL; - memcpy(dst, src, size); - return dst; -} - -static void -vc5_generic_cso_state_delete(struct pipe_context *pctx, void *hwcso) -{ - free(hwcso); -} - -static void -vc5_set_blend_color(struct pipe_context *pctx, - const struct pipe_blend_color *blend_color) -{ - struct vc5_context *vc5 = vc5_context(pctx); - vc5->blend_color.f = *blend_color; - for (int i = 0; i < 4; i++) { - vc5->blend_color.hf[i] = - util_float_to_half(blend_color->color[i]); - } - vc5->dirty |= VC5_DIRTY_BLEND_COLOR; -} - -static void -vc5_set_stencil_ref(struct pipe_context *pctx, - const struct pipe_stencil_ref *stencil_ref) -{ - struct vc5_context *vc5 = vc5_context(pctx); - vc5->stencil_ref = *stencil_ref; - vc5->dirty |= VC5_DIRTY_STENCIL_REF; -} - -static void -vc5_set_clip_state(struct pipe_context *pctx, - const struct pipe_clip_state *clip) -{ - struct vc5_context *vc5 = vc5_context(pctx); - vc5->clip = *clip; - vc5->dirty |= VC5_DIRTY_CLIP; -} - -static void -vc5_set_sample_mask(struct pipe_context *pctx, unsigned sample_mask) -{ - struct vc5_context *vc5 = vc5_context(pctx); - vc5->sample_mask = sample_mask & ((1 << VC5_MAX_SAMPLES) - 1); - vc5->dirty |= VC5_DIRTY_SAMPLE_MASK; -} - -static uint16_t -float_to_187_half(float f) -{ - return fui(f) >> 16; -} - -static void * -vc5_create_rasterizer_state(struct pipe_context *pctx, - const struct pipe_rasterizer_state *cso) -{ - struct vc5_rasterizer_state *so; - - so = CALLOC_STRUCT(vc5_rasterizer_state); - if (!so) - return NULL; - - so->base = *cso; - - /* Workaround: HW-2726 PTB does not handle zero-size points (BCM2835, - * BCM21553). - */ - so->point_size = MAX2(cso->point_size, .125f); - - if (cso->offset_tri) { - so->offset_units = float_to_187_half(cso->offset_units); - so->offset_factor = float_to_187_half(cso->offset_scale); - } - - return so; -} - -/* Blend state is baked into shaders. */ -static void * -vc5_create_blend_state(struct pipe_context *pctx, - const struct pipe_blend_state *cso) -{ - return vc5_generic_cso_state_create(cso, sizeof(*cso)); -} - -static uint32_t -translate_stencil_op(enum pipe_stencil_op op) -{ - switch (op) { - case PIPE_STENCIL_OP_KEEP: return V3D_STENCIL_OP_KEEP; - case PIPE_STENCIL_OP_ZERO: return V3D_STENCIL_OP_ZERO; - case PIPE_STENCIL_OP_REPLACE: return V3D_STENCIL_OP_REPLACE; - case PIPE_STENCIL_OP_INCR: return V3D_STENCIL_OP_INCR; - case PIPE_STENCIL_OP_DECR: return V3D_STENCIL_OP_DECR; - case PIPE_STENCIL_OP_INCR_WRAP: return V3D_STENCIL_OP_INCWRAP; - case PIPE_STENCIL_OP_DECR_WRAP: return V3D_STENCIL_OP_DECWRAP; - case PIPE_STENCIL_OP_INVERT: return V3D_STENCIL_OP_INVERT; - } - unreachable("bad stencil op"); -} - -static void * -vc5_create_depth_stencil_alpha_state(struct pipe_context *pctx, - const struct pipe_depth_stencil_alpha_state *cso) -{ - struct vc5_depth_stencil_alpha_state *so; - - so = CALLOC_STRUCT(vc5_depth_stencil_alpha_state); - if (!so) - return NULL; - - so->base = *cso; - - if (cso->depth.enabled) { - switch (cso->depth.func) { - case PIPE_FUNC_LESS: - case PIPE_FUNC_LEQUAL: - so->ez_state = VC5_EZ_LT_LE; - break; - case PIPE_FUNC_GREATER: - case PIPE_FUNC_GEQUAL: - so->ez_state = VC5_EZ_GT_GE; - break; - case PIPE_FUNC_NEVER: - case PIPE_FUNC_EQUAL: - so->ez_state = VC5_EZ_UNDECIDED; - break; - default: - so->ez_state = VC5_EZ_DISABLED; - break; - } - - /* If stencil is enabled and it's not a no-op, then it would - * break EZ updates. - */ - if (cso->stencil[0].enabled && - (cso->stencil[0].zfail_op != PIPE_STENCIL_OP_KEEP || - cso->stencil[0].func != PIPE_FUNC_ALWAYS || - (cso->stencil[1].enabled && - (cso->stencil[1].zfail_op != PIPE_STENCIL_OP_KEEP && - cso->stencil[1].func != PIPE_FUNC_ALWAYS)))) { - so->ez_state = VC5_EZ_DISABLED; - } - } - - const struct pipe_stencil_state *front = &cso->stencil[0]; - const struct pipe_stencil_state *back = &cso->stencil[1]; - - if (front->enabled) { - v3dx_pack(&so->stencil_front, STENCIL_CONFIG, config) { - config.front_config = true; - /* If !back->enabled, then the front values should be - * used for both front and back-facing primitives. - */ - config.back_config = !back->enabled; - - config.stencil_write_mask = front->writemask; - config.stencil_test_mask = front->valuemask; - - config.stencil_test_function = front->func; - config.stencil_pass_op = - translate_stencil_op(front->zpass_op); - config.depth_test_fail_op = - translate_stencil_op(front->zfail_op); - config.stencil_test_fail_op = - translate_stencil_op(front->fail_op); - } - } - if (back->enabled) { - v3dx_pack(&so->stencil_back, STENCIL_CONFIG, config) { - config.front_config = false; - config.back_config = true; - - config.stencil_write_mask = back->writemask; - config.stencil_test_mask = back->valuemask; - - config.stencil_test_function = back->func; - config.stencil_pass_op = - translate_stencil_op(back->zpass_op); - config.depth_test_fail_op = - translate_stencil_op(back->zfail_op); - config.stencil_test_fail_op = - translate_stencil_op(back->fail_op); - } - } - - return so; -} - -static void -vc5_set_polygon_stipple(struct pipe_context *pctx, - const struct pipe_poly_stipple *stipple) -{ - struct vc5_context *vc5 = vc5_context(pctx); - vc5->stipple = *stipple; - vc5->dirty |= VC5_DIRTY_STIPPLE; -} - -static void -vc5_set_scissor_states(struct pipe_context *pctx, - unsigned start_slot, - unsigned num_scissors, - const struct pipe_scissor_state *scissor) -{ - struct vc5_context *vc5 = vc5_context(pctx); - - vc5->scissor = *scissor; - vc5->dirty |= VC5_DIRTY_SCISSOR; -} - -static void -vc5_set_viewport_states(struct pipe_context *pctx, - unsigned start_slot, - unsigned num_viewports, - const struct pipe_viewport_state *viewport) -{ - struct vc5_context *vc5 = vc5_context(pctx); - vc5->viewport = *viewport; - vc5->dirty |= VC5_DIRTY_VIEWPORT; -} - -static void -vc5_set_vertex_buffers(struct pipe_context *pctx, - unsigned start_slot, unsigned count, - const struct pipe_vertex_buffer *vb) -{ - struct vc5_context *vc5 = vc5_context(pctx); - struct vc5_vertexbuf_stateobj *so = &vc5->vertexbuf; - - util_set_vertex_buffers_mask(so->vb, &so->enabled_mask, vb, - start_slot, count); - so->count = util_last_bit(so->enabled_mask); - - vc5->dirty |= VC5_DIRTY_VTXBUF; -} - -static void -vc5_blend_state_bind(struct pipe_context *pctx, void *hwcso) -{ - struct vc5_context *vc5 = vc5_context(pctx); - vc5->blend = hwcso; - vc5->dirty |= VC5_DIRTY_BLEND; -} - -static void -vc5_rasterizer_state_bind(struct pipe_context *pctx, void *hwcso) -{ - struct vc5_context *vc5 = vc5_context(pctx); - vc5->rasterizer = hwcso; - vc5->dirty |= VC5_DIRTY_RASTERIZER; -} - -static void -vc5_zsa_state_bind(struct pipe_context *pctx, void *hwcso) -{ - struct vc5_context *vc5 = vc5_context(pctx); - vc5->zsa = hwcso; - vc5->dirty |= VC5_DIRTY_ZSA; -} - -static void * -vc5_vertex_state_create(struct pipe_context *pctx, unsigned num_elements, - const struct pipe_vertex_element *elements) -{ - struct vc5_context *vc5 = vc5_context(pctx); - struct vc5_vertex_stateobj *so = CALLOC_STRUCT(vc5_vertex_stateobj); - - if (!so) - return NULL; - - memcpy(so->pipe, elements, sizeof(*elements) * num_elements); - so->num_elements = num_elements; - - for (int i = 0; i < so->num_elements; i++) { - const struct pipe_vertex_element *elem = &elements[i]; - const struct util_format_description *desc = - util_format_description(elem->src_format); - uint32_t r_size = desc->channel[0].size; - - const uint32_t size = - cl_packet_length(GL_SHADER_STATE_ATTRIBUTE_RECORD); - - v3dx_pack(&so->attrs[i * size], - GL_SHADER_STATE_ATTRIBUTE_RECORD, attr) { - /* vec_size == 0 means 4 */ - attr.vec_size = desc->nr_channels & 3; - attr.signed_int_type = (desc->channel[0].type == - UTIL_FORMAT_TYPE_SIGNED); - - attr.normalized_int_type = desc->channel[0].normalized; - attr.read_as_int_uint = desc->channel[0].pure_integer; - attr.instance_divisor = MIN2(elem->instance_divisor, - 0xffff); - - switch (desc->channel[0].type) { - case UTIL_FORMAT_TYPE_FLOAT: - if (r_size == 32) { - attr.type = ATTRIBUTE_FLOAT; - } else { - assert(r_size == 16); - attr.type = ATTRIBUTE_HALF_FLOAT; - } - break; - - case UTIL_FORMAT_TYPE_SIGNED: - case UTIL_FORMAT_TYPE_UNSIGNED: - switch (r_size) { - case 32: - attr.type = ATTRIBUTE_INT; - break; - case 16: - attr.type = ATTRIBUTE_SHORT; - break; - case 10: - attr.type = ATTRIBUTE_INT2_10_10_10; - break; - case 8: - attr.type = ATTRIBUTE_BYTE; - break; - default: - fprintf(stderr, - "format %s unsupported\n", - desc->name); - attr.type = ATTRIBUTE_BYTE; - abort(); - } - break; - - default: - fprintf(stderr, - "format %s unsupported\n", - desc->name); - abort(); - } - } - } - - /* Set up the default attribute values in case any of the vertex - * elements use them. - */ - so->default_attribute_values = vc5_bo_alloc(vc5->screen, - VC5_MAX_ATTRIBUTES * - 4 * sizeof(float), - "default attributes"); - uint32_t *attrs = vc5_bo_map(so->default_attribute_values); - for (int i = 0; i < VC5_MAX_ATTRIBUTES; i++) { - attrs[i * 4 + 0] = 0; - attrs[i * 4 + 1] = 0; - attrs[i * 4 + 2] = 0; - if (i < so->num_elements && - util_format_is_pure_integer(so->pipe[i].src_format)) { - attrs[i * 4 + 3] = 1; - } else { - attrs[i * 4 + 3] = fui(1.0); - } - } - - return so; -} - -static void -vc5_vertex_state_bind(struct pipe_context *pctx, void *hwcso) -{ - struct vc5_context *vc5 = vc5_context(pctx); - vc5->vtx = hwcso; - vc5->dirty |= VC5_DIRTY_VTXSTATE; -} - -static void -vc5_set_constant_buffer(struct pipe_context *pctx, uint shader, uint index, - const struct pipe_constant_buffer *cb) -{ - struct vc5_context *vc5 = vc5_context(pctx); - struct vc5_constbuf_stateobj *so = &vc5->constbuf[shader]; - - util_copy_constant_buffer(&so->cb[index], cb); - - /* Note that the state tracker can unbind constant buffers by - * passing NULL here. - */ - if (unlikely(!cb)) { - so->enabled_mask &= ~(1 << index); - so->dirty_mask &= ~(1 << index); - return; - } - - so->enabled_mask |= 1 << index; - so->dirty_mask |= 1 << index; - vc5->dirty |= VC5_DIRTY_CONSTBUF; -} - -static void -vc5_set_framebuffer_state(struct pipe_context *pctx, - const struct pipe_framebuffer_state *framebuffer) -{ - struct vc5_context *vc5 = vc5_context(pctx); - struct pipe_framebuffer_state *cso = &vc5->framebuffer; - - vc5->job = NULL; - - util_copy_framebuffer_state(cso, framebuffer); - - vc5->swap_color_rb = 0; - vc5->blend_dst_alpha_one = 0; - for (int i = 0; i < vc5->framebuffer.nr_cbufs; i++) { - struct pipe_surface *cbuf = vc5->framebuffer.cbufs[i]; - if (!cbuf) - continue; - - const struct util_format_description *desc = - util_format_description(cbuf->format); - - /* For BGRA8 formats (DRI window system default format), we - * need to swap R and B, since the HW's format is RGBA8. - */ - if (desc->swizzle[0] == PIPE_SWIZZLE_Z && - cbuf->format != PIPE_FORMAT_B5G6R5_UNORM) { - vc5->swap_color_rb |= 1 << i; - } - - if (desc->swizzle[3] == PIPE_SWIZZLE_1) - vc5->blend_dst_alpha_one |= 1 << i; - } - - vc5->dirty |= VC5_DIRTY_FRAMEBUFFER; -} - -static struct vc5_texture_stateobj * -vc5_get_stage_tex(struct vc5_context *vc5, enum pipe_shader_type shader) -{ - switch (shader) { - case PIPE_SHADER_FRAGMENT: - vc5->dirty |= VC5_DIRTY_FRAGTEX; - return &vc5->fragtex; - break; - case PIPE_SHADER_VERTEX: - vc5->dirty |= VC5_DIRTY_VERTTEX; - return &vc5->verttex; - break; - default: - fprintf(stderr, "Unknown shader target %d\n", shader); - abort(); - } -} - -static uint32_t translate_wrap(uint32_t pipe_wrap, bool using_nearest) -{ - switch (pipe_wrap) { - case PIPE_TEX_WRAP_REPEAT: - return 0; - case PIPE_TEX_WRAP_CLAMP_TO_EDGE: - return 1; - case PIPE_TEX_WRAP_MIRROR_REPEAT: - return 2; - case PIPE_TEX_WRAP_CLAMP_TO_BORDER: - return 3; - case PIPE_TEX_WRAP_CLAMP: - return (using_nearest ? 1 : 3); - default: - unreachable("Unknown wrap mode"); - } -} - - -static void * -vc5_create_sampler_state(struct pipe_context *pctx, - const struct pipe_sampler_state *cso) -{ - MAYBE_UNUSED struct vc5_context *vc5 = vc5_context(pctx); - struct vc5_sampler_state *so = CALLOC_STRUCT(vc5_sampler_state); - - if (!so) - return NULL; - - memcpy(so, cso, sizeof(*cso)); - - bool either_nearest = - (cso->mag_img_filter == PIPE_TEX_MIPFILTER_NEAREST || - cso->min_img_filter == PIPE_TEX_MIPFILTER_NEAREST); - -#if V3D_VERSION >= 40 - so->bo = vc5_bo_alloc(vc5->screen, cl_packet_length(SAMPLER_STATE), - "sampler"); - void *map = vc5_bo_map(so->bo); - - v3dx_pack(map, SAMPLER_STATE, sampler) { - sampler.wrap_i_border = false; - - sampler.wrap_s = translate_wrap(cso->wrap_s, either_nearest); - sampler.wrap_t = translate_wrap(cso->wrap_t, either_nearest); - sampler.wrap_r = translate_wrap(cso->wrap_r, either_nearest); - - sampler.fixed_bias = cso->lod_bias; - sampler.depth_compare_function = cso->compare_func; - - sampler.min_filter_nearest = - cso->min_img_filter == PIPE_TEX_FILTER_NEAREST; - sampler.mag_filter_nearest = - cso->mag_img_filter == PIPE_TEX_FILTER_NEAREST; - sampler.mip_filter_nearest = - cso->min_mip_filter != PIPE_TEX_MIPFILTER_LINEAR; - - sampler.min_level_of_detail = MIN2(MAX2(0, cso->min_lod), - 15); - sampler.max_level_of_detail = MIN2(cso->max_lod, 15); - - if (cso->min_mip_filter == PIPE_TEX_MIPFILTER_NONE) { - sampler.min_level_of_detail = 0; - sampler.max_level_of_detail = 0; - } - - if (cso->max_anisotropy) { - sampler.anisotropy_enable = true; - - if (cso->max_anisotropy > 8) - sampler.maximum_anisotropy = 3; - else if (cso->max_anisotropy > 4) - sampler.maximum_anisotropy = 2; - else if (cso->max_anisotropy > 2) - sampler.maximum_anisotropy = 1; - } - - sampler.border_colour_mode = V3D_BORDER_COLOUR_FOLLOWS; - /* XXX: The border colour field is in the TMU blending format - * (32, f16, or i16), and we need to customize it based on - * that. - * - * XXX: for compat alpha formats, we need the alpha field to - * be in the red channel. - */ - sampler.border_colour_red = - util_float_to_half(cso->border_color.f[0]); - sampler.border_colour_green = - util_float_to_half(cso->border_color.f[1]); - sampler.border_colour_blue = - util_float_to_half(cso->border_color.f[2]); - sampler.border_colour_alpha = - util_float_to_half(cso->border_color.f[3]); - } - -#else /* V3D_VERSION < 40 */ - v3dx_pack(&so->p0, TEXTURE_UNIFORM_PARAMETER_0_CFG_MODE1, p0) { - p0.s_wrap_mode = translate_wrap(cso->wrap_s, either_nearest); - p0.t_wrap_mode = translate_wrap(cso->wrap_t, either_nearest); - p0.r_wrap_mode = translate_wrap(cso->wrap_r, either_nearest); - } - - v3dx_pack(&so->texture_shader_state, TEXTURE_SHADER_STATE, tex) { - tex.depth_compare_function = cso->compare_func; - tex.fixed_bias = cso->lod_bias; - } -#endif /* V3D_VERSION < 40 */ - return so; -} - -static void -vc5_sampler_states_bind(struct pipe_context *pctx, - enum pipe_shader_type shader, unsigned start, - unsigned nr, void **hwcso) -{ - struct vc5_context *vc5 = vc5_context(pctx); - struct vc5_texture_stateobj *stage_tex = vc5_get_stage_tex(vc5, shader); - - assert(start == 0); - unsigned i; - unsigned new_nr = 0; - - for (i = 0; i < nr; i++) { - if (hwcso[i]) - new_nr = i + 1; - stage_tex->samplers[i] = hwcso[i]; - } - - for (; i < stage_tex->num_samplers; i++) { - stage_tex->samplers[i] = NULL; - } - - stage_tex->num_samplers = new_nr; -} - -static void -vc5_sampler_state_delete(struct pipe_context *pctx, - void *hwcso) -{ - struct pipe_sampler_state *psampler = hwcso; - struct vc5_sampler_state *sampler = vc5_sampler_state(psampler); - - vc5_bo_unreference(&sampler->bo); - free(psampler); -} - -#if V3D_VERSION >= 40 -static uint32_t -translate_swizzle(unsigned char pipe_swizzle) -{ - switch (pipe_swizzle) { - case PIPE_SWIZZLE_0: - return 0; - case PIPE_SWIZZLE_1: - return 1; - case PIPE_SWIZZLE_X: - case PIPE_SWIZZLE_Y: - case PIPE_SWIZZLE_Z: - case PIPE_SWIZZLE_W: - return 2 + pipe_swizzle; - default: - unreachable("unknown swizzle"); - } -} -#endif - -static struct pipe_sampler_view * -vc5_create_sampler_view(struct pipe_context *pctx, struct pipe_resource *prsc, - const struct pipe_sampler_view *cso) -{ - struct vc5_context *vc5 = vc5_context(pctx); - struct vc5_screen *screen = vc5->screen; - struct vc5_sampler_view *so = CALLOC_STRUCT(vc5_sampler_view); - struct vc5_resource *rsc = vc5_resource(prsc); - - if (!so) - return NULL; - - so->base = *cso; - - pipe_reference(NULL, &prsc->reference); - - /* Compute the sampler view's swizzle up front. This will be plugged - * into either the sampler (for 16-bit returns) or the shader's - * texture key (for 32) - */ - uint8_t view_swizzle[4] = { - cso->swizzle_r, - cso->swizzle_g, - cso->swizzle_b, - cso->swizzle_a - }; - const uint8_t *fmt_swizzle = - vc5_get_format_swizzle(&screen->devinfo, so->base.format); - util_format_compose_swizzles(fmt_swizzle, view_swizzle, so->swizzle); - - so->base.texture = prsc; - so->base.reference.count = 1; - so->base.context = pctx; - - int msaa_scale = prsc->nr_samples > 1 ? 2 : 1; - -#if V3D_VERSION >= 40 - so->bo = vc5_bo_alloc(vc5->screen, cl_packet_length(SAMPLER_STATE), - "sampler"); - void *map = vc5_bo_map(so->bo); - - v3dx_pack(map, TEXTURE_SHADER_STATE, tex) { -#else /* V3D_VERSION < 40 */ - v3dx_pack(&so->texture_shader_state, TEXTURE_SHADER_STATE, tex) { -#endif - - tex.image_width = prsc->width0 * msaa_scale; - tex.image_height = prsc->height0 * msaa_scale; - -#if V3D_VERSION >= 40 - /* On 4.x, the height of a 1D texture is redefined to be the - * upper 14 bits of the width (which is only usable with txf). - */ - if (prsc->target == PIPE_TEXTURE_1D || - prsc->target == PIPE_TEXTURE_1D_ARRAY) { - tex.image_height = tex.image_width >> 14; - } -#endif - - if (prsc->target == PIPE_TEXTURE_3D) { - tex.image_depth = prsc->depth0; - } else { - tex.image_depth = (cso->u.tex.last_layer - - cso->u.tex.first_layer) + 1; - } - - tex.srgb = util_format_is_srgb(cso->format); - - tex.base_level = cso->u.tex.first_level; -#if V3D_VERSION >= 40 - tex.max_level = cso->u.tex.last_level; - /* Note that we don't have a job to reference the texture's sBO - * at state create time, so any time this sampler view is used - * we need to add the texture to the job. - */ - tex.texture_base_pointer = cl_address(NULL, - rsc->bo->offset + - rsc->slices[0].offset), - - tex.swizzle_r = translate_swizzle(so->swizzle[0]); - tex.swizzle_g = translate_swizzle(so->swizzle[1]); - tex.swizzle_b = translate_swizzle(so->swizzle[2]); - tex.swizzle_a = translate_swizzle(so->swizzle[3]); -#endif - tex.array_stride_64_byte_aligned = rsc->cube_map_stride / 64; - - if (prsc->nr_samples > 1 && V3D_VERSION < 40) { - /* Using texture views to reinterpret formats on our - * MSAA textures won't work, because we don't lay out - * the bits in memory as it's expected -- for example, - * RGBA8 and RGB10_A2 are compatible in the - * ARB_texture_view spec, but in HW we lay them out as - * 32bpp RGBA8 and 64bpp RGBA16F. Just assert for now - * to catch failures. - * - * We explicitly allow remapping S8Z24 to RGBA8888 for - * vc5_blit.c's stencil blits. - */ - assert((util_format_linear(cso->format) == - util_format_linear(prsc->format)) || - (prsc->format == PIPE_FORMAT_S8_UINT_Z24_UNORM && - cso->format == PIPE_FORMAT_R8G8B8A8_UNORM)); - uint32_t output_image_format = - vc5_get_rt_format(&screen->devinfo, cso->format); - uint32_t internal_type; - uint32_t internal_bpp; - vc5_get_internal_type_bpp_for_output_format(&screen->devinfo, - output_image_format, - &internal_type, - &internal_bpp); - - switch (internal_type) { - case V3D_INTERNAL_TYPE_8: - tex.texture_type = TEXTURE_DATA_FORMAT_RGBA8; - break; - case V3D_INTERNAL_TYPE_16F: - tex.texture_type = TEXTURE_DATA_FORMAT_RGBA16F; - break; - default: - unreachable("Bad MSAA texture type"); - } - - /* sRGB was stored in the tile buffer as linear and - * would have been encoded to sRGB on resolved tile - * buffer store. Note that this means we would need - * shader code if we wanted to read an MSAA sRGB - * texture without sRGB decode. - */ - tex.srgb = false; - } else { - tex.texture_type = vc5_get_tex_format(&screen->devinfo, - cso->format); - } - - /* Since other platform devices may produce UIF images even - * when they're not big enough for V3D to assume they're UIF, - * we force images with level 0 as UIF to be always treated - * that way. - */ - tex.level_0_is_strictly_uif = (rsc->slices[0].tiling == - VC5_TILING_UIF_XOR || - rsc->slices[0].tiling == - VC5_TILING_UIF_NO_XOR); - tex.level_0_xor_enable = (rsc->slices[0].tiling == - VC5_TILING_UIF_XOR); - - if (tex.level_0_is_strictly_uif) - tex.level_0_ub_pad = rsc->slices[0].ub_pad; - -#if V3D_VERSION >= 40 - if (tex.uif_xor_disable || - tex.level_0_is_strictly_uif) { - tex.extended = true; - } -#endif /* V3D_VERSION >= 40 */ - }; - - return &so->base; -} - -static void -vc5_sampler_view_destroy(struct pipe_context *pctx, - struct pipe_sampler_view *psview) -{ - struct vc5_sampler_view *sview = vc5_sampler_view(psview); - - vc5_bo_unreference(&sview->bo); - pipe_resource_reference(&psview->texture, NULL); - free(psview); -} - -static void -vc5_set_sampler_views(struct pipe_context *pctx, - enum pipe_shader_type shader, - unsigned start, unsigned nr, - struct pipe_sampler_view **views) -{ - struct vc5_context *vc5 = vc5_context(pctx); - struct vc5_texture_stateobj *stage_tex = vc5_get_stage_tex(vc5, shader); - unsigned i; - unsigned new_nr = 0; - - assert(start == 0); - - for (i = 0; i < nr; i++) { - if (views[i]) - new_nr = i + 1; - pipe_sampler_view_reference(&stage_tex->textures[i], views[i]); - } - - for (; i < stage_tex->num_textures; i++) { - pipe_sampler_view_reference(&stage_tex->textures[i], NULL); - } - - stage_tex->num_textures = new_nr; -} - -static struct pipe_stream_output_target * -vc5_create_stream_output_target(struct pipe_context *pctx, - struct pipe_resource *prsc, - unsigned buffer_offset, - unsigned buffer_size) -{ - struct pipe_stream_output_target *target; - - target = CALLOC_STRUCT(pipe_stream_output_target); - if (!target) - return NULL; - - pipe_reference_init(&target->reference, 1); - pipe_resource_reference(&target->buffer, prsc); - - target->context = pctx; - target->buffer_offset = buffer_offset; - target->buffer_size = buffer_size; - - return target; -} - -static void -vc5_stream_output_target_destroy(struct pipe_context *pctx, - struct pipe_stream_output_target *target) -{ - pipe_resource_reference(&target->buffer, NULL); - free(target); -} - -static void -vc5_set_stream_output_targets(struct pipe_context *pctx, - unsigned num_targets, - struct pipe_stream_output_target **targets, - const unsigned *offsets) -{ - struct vc5_context *ctx = vc5_context(pctx); - struct vc5_streamout_stateobj *so = &ctx->streamout; - unsigned i; - - assert(num_targets <= ARRAY_SIZE(so->targets)); - - for (i = 0; i < num_targets; i++) - pipe_so_target_reference(&so->targets[i], targets[i]); - - for (; i < so->num_targets; i++) - pipe_so_target_reference(&so->targets[i], NULL); - - so->num_targets = num_targets; - - ctx->dirty |= VC5_DIRTY_STREAMOUT; -} - -void -v3dX(state_init)(struct pipe_context *pctx) -{ - pctx->set_blend_color = vc5_set_blend_color; - pctx->set_stencil_ref = vc5_set_stencil_ref; - pctx->set_clip_state = vc5_set_clip_state; - pctx->set_sample_mask = vc5_set_sample_mask; - pctx->set_constant_buffer = vc5_set_constant_buffer; - pctx->set_framebuffer_state = vc5_set_framebuffer_state; - pctx->set_polygon_stipple = vc5_set_polygon_stipple; - pctx->set_scissor_states = vc5_set_scissor_states; - pctx->set_viewport_states = vc5_set_viewport_states; - - pctx->set_vertex_buffers = vc5_set_vertex_buffers; - - pctx->create_blend_state = vc5_create_blend_state; - pctx->bind_blend_state = vc5_blend_state_bind; - pctx->delete_blend_state = vc5_generic_cso_state_delete; - - pctx->create_rasterizer_state = vc5_create_rasterizer_state; - pctx->bind_rasterizer_state = vc5_rasterizer_state_bind; - pctx->delete_rasterizer_state = vc5_generic_cso_state_delete; - - pctx->create_depth_stencil_alpha_state = vc5_create_depth_stencil_alpha_state; - pctx->bind_depth_stencil_alpha_state = vc5_zsa_state_bind; - pctx->delete_depth_stencil_alpha_state = vc5_generic_cso_state_delete; - - pctx->create_vertex_elements_state = vc5_vertex_state_create; - pctx->delete_vertex_elements_state = vc5_generic_cso_state_delete; - pctx->bind_vertex_elements_state = vc5_vertex_state_bind; - - pctx->create_sampler_state = vc5_create_sampler_state; - pctx->delete_sampler_state = vc5_sampler_state_delete; - pctx->bind_sampler_states = vc5_sampler_states_bind; - - pctx->create_sampler_view = vc5_create_sampler_view; - pctx->sampler_view_destroy = vc5_sampler_view_destroy; - pctx->set_sampler_views = vc5_set_sampler_views; - - pctx->create_stream_output_target = vc5_create_stream_output_target; - pctx->stream_output_target_destroy = vc5_stream_output_target_destroy; - pctx->set_stream_output_targets = vc5_set_stream_output_targets; -} diff --git a/src/gallium/drivers/vc5/vc5_tiling.c b/src/gallium/drivers/vc5/vc5_tiling.c deleted file mode 100644 index cbd86d5566a..00000000000 --- a/src/gallium/drivers/vc5/vc5_tiling.c +++ /dev/null @@ -1,389 +0,0 @@ -/* - * Copyright © 2014-2017 Broadcom - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -/** @file vc5_tiling.c - * - * Handles information about the VC5 tiling formats, and loading and storing - * from them. - */ - -#include -#include "vc5_screen.h" -#include "vc5_context.h" -#include "vc5_tiling.h" - -/** Return the width in pixels of a 64-byte microtile. */ -uint32_t -vc5_utile_width(int cpp) -{ - switch (cpp) { - case 1: - case 2: - return 8; - case 4: - case 8: - return 4; - case 16: - return 2; - default: - unreachable("unknown cpp"); - } -} - -/** Return the height in pixels of a 64-byte microtile. */ -uint32_t -vc5_utile_height(int cpp) -{ - switch (cpp) { - case 1: - return 8; - case 2: - case 4: - return 4; - case 8: - case 16: - return 2; - default: - unreachable("unknown cpp"); - } -} - -/** - * Returns the byte address for a given pixel within a utile. - * - * Utiles are 64b blocks of pixels in raster order, with 32bpp being a 4x4 - * arrangement. - */ -static inline uint32_t -vc5_get_utile_pixel_offset(uint32_t cpp, uint32_t x, uint32_t y) -{ - uint32_t utile_w = vc5_utile_width(cpp); - uint32_t utile_h = vc5_utile_height(cpp); - - assert(x < utile_w && y < utile_h); - - return x * cpp + y * utile_w * cpp; -} - -/** - * Returns the byte offset for a given pixel in a LINEARTILE layout. - * - * LINEARTILE is a single line of utiles in either the X or Y direction. - */ -static inline uint32_t -vc5_get_lt_pixel_offset(uint32_t cpp, uint32_t image_h, uint32_t x, uint32_t y) -{ - uint32_t utile_w = vc5_utile_width(cpp); - uint32_t utile_h = vc5_utile_height(cpp); - uint32_t utile_index_x = x / utile_w; - uint32_t utile_index_y = y / utile_h; - - assert(utile_index_x == 0 || utile_index_y == 0); - - return (64 * (utile_index_x + utile_index_y) + - vc5_get_utile_pixel_offset(cpp, - x & (utile_w - 1), - y & (utile_h - 1))); -} - -/** - * Returns the byte offset for a given pixel in a UBLINEAR layout. - * - * UBLINEAR is the layout where pixels are arranged in UIF blocks (2x2 - * utiles), and the UIF blocks are in 1 or 2 columns in raster order. - */ -static inline uint32_t -vc5_get_ublinear_pixel_offset(uint32_t cpp, uint32_t x, uint32_t y, - int ublinear_number) -{ - uint32_t utile_w = vc5_utile_width(cpp); - uint32_t utile_h = vc5_utile_height(cpp); - uint32_t ub_w = utile_w * 2; - uint32_t ub_h = utile_h * 2; - uint32_t ub_x = x / ub_w; - uint32_t ub_y = y / ub_h; - - return (256 * (ub_y * ublinear_number + - ub_x) + - ((x & utile_w) ? 64 : 0) + - ((y & utile_h) ? 128 : 0) + - + vc5_get_utile_pixel_offset(cpp, - x & (utile_w - 1), - y & (utile_h - 1))); -} - -static inline uint32_t -vc5_get_ublinear_2_column_pixel_offset(uint32_t cpp, uint32_t image_h, - uint32_t x, uint32_t y) -{ - return vc5_get_ublinear_pixel_offset(cpp, x, y, 2); -} - -static inline uint32_t -vc5_get_ublinear_1_column_pixel_offset(uint32_t cpp, uint32_t image_h, - uint32_t x, uint32_t y) -{ - return vc5_get_ublinear_pixel_offset(cpp, x, y, 1); -} - -/** - * Returns the byte offset for a given pixel in a UIF layout. - * - * UIF is the general VC5 tiling layout shared across 3D, media, and scanout. - * It stores pixels in UIF blocks (2x2 utiles), and UIF blocks are stored in - * 4x4 groups, and those 4x4 groups are then stored in raster order. - */ -static inline uint32_t -vc5_get_uif_pixel_offset(uint32_t cpp, uint32_t image_h, uint32_t x, uint32_t y, - bool do_xor) -{ - uint32_t utile_w = vc5_utile_width(cpp); - uint32_t utile_h = vc5_utile_height(cpp); - uint32_t mb_width = utile_w * 2; - uint32_t mb_height = utile_h * 2; - uint32_t log2_mb_width = ffs(mb_width) - 1; - uint32_t log2_mb_height = ffs(mb_height) - 1; - - /* Macroblock X, y */ - uint32_t mb_x = x >> log2_mb_width; - uint32_t mb_y = y >> log2_mb_height; - /* X, y within the macroblock */ - uint32_t mb_pixel_x = x - (mb_x << log2_mb_width); - uint32_t mb_pixel_y = y - (mb_y << log2_mb_height); - - if (do_xor && (mb_x / 4) & 1) - mb_y ^= 0x10; - - uint32_t mb_h = align(image_h, 1 << log2_mb_height) >> log2_mb_height; - uint32_t mb_id = ((mb_x / 4) * ((mb_h - 1) * 4)) + mb_x + mb_y * 4; - - uint32_t mb_base_addr = mb_id * 256; - - bool top = mb_pixel_y < utile_h; - bool left = mb_pixel_x < utile_w; - - /* Docs have this in pixels, we do bytes here. */ - uint32_t mb_tile_offset = (!top * 128 + !left * 64); - - uint32_t utile_x = mb_pixel_x & (utile_w - 1); - uint32_t utile_y = mb_pixel_y & (utile_h - 1); - - uint32_t mb_pixel_address = (mb_base_addr + - mb_tile_offset + - vc5_get_utile_pixel_offset(cpp, - utile_x, - utile_y)); - - return mb_pixel_address; -} - -static inline uint32_t -vc5_get_uif_xor_pixel_offset(uint32_t cpp, uint32_t image_h, - uint32_t x, uint32_t y) -{ - return vc5_get_uif_pixel_offset(cpp, image_h, x, y, true); -} - -static inline uint32_t -vc5_get_uif_no_xor_pixel_offset(uint32_t cpp, uint32_t image_h, - uint32_t x, uint32_t y) -{ - return vc5_get_uif_pixel_offset(cpp, image_h, x, y, false); -} - -static inline void -vc5_move_pixels_general_percpp(void *gpu, uint32_t gpu_stride, - void *cpu, uint32_t cpu_stride, - int cpp, uint32_t image_h, - const struct pipe_box *box, - uint32_t (*get_pixel_offset)(uint32_t cpp, - uint32_t image_h, - uint32_t x, uint32_t y), - bool is_load) -{ - for (uint32_t y = 0; y < box->height; y++) { - void *cpu_row = cpu + y * cpu_stride; - - for (int x = 0; x < box->width; x++) { - uint32_t pixel_offset = get_pixel_offset(cpp, image_h, - box->x + x, - box->y + y); - - if (false) { - fprintf(stderr, "%3d,%3d -> %d\n", - box->x + x, box->y + y, - pixel_offset); - } - - if (is_load) { - memcpy(cpu_row + x * cpp, - gpu + pixel_offset, - cpp); - } else { - memcpy(gpu + pixel_offset, - cpu_row + x * cpp, - cpp); - } - } - } -} - -static inline void -vc5_move_pixels_general(void *gpu, uint32_t gpu_stride, - void *cpu, uint32_t cpu_stride, - int cpp, uint32_t image_h, - const struct pipe_box *box, - uint32_t (*get_pixel_offset)(uint32_t cpp, - uint32_t image_h, - uint32_t x, uint32_t y), - bool is_load) -{ - switch (cpp) { - case 1: - vc5_move_pixels_general_percpp(gpu, gpu_stride, - cpu, cpu_stride, - 1, image_h, box, - get_pixel_offset, - is_load); - break; - case 2: - vc5_move_pixels_general_percpp(gpu, gpu_stride, - cpu, cpu_stride, - 2, image_h, box, - get_pixel_offset, - is_load); - break; - case 4: - vc5_move_pixels_general_percpp(gpu, gpu_stride, - cpu, cpu_stride, - 4, image_h, box, - get_pixel_offset, - is_load); - break; - case 8: - vc5_move_pixels_general_percpp(gpu, gpu_stride, - cpu, cpu_stride, - 8, image_h, box, - get_pixel_offset, - is_load); - break; - case 16: - vc5_move_pixels_general_percpp(gpu, gpu_stride, - cpu, cpu_stride, - 16, image_h, box, - get_pixel_offset, - is_load); - break; - } -} - -static inline void -vc5_move_tiled_image(void *gpu, uint32_t gpu_stride, - void *cpu, uint32_t cpu_stride, - enum vc5_tiling_mode tiling_format, - int cpp, - uint32_t image_h, - const struct pipe_box *box, - bool is_load) -{ - switch (tiling_format) { - case VC5_TILING_UIF_XOR: - vc5_move_pixels_general(gpu, gpu_stride, - cpu, cpu_stride, - cpp, image_h, box, - vc5_get_uif_xor_pixel_offset, - is_load); - break; - case VC5_TILING_UIF_NO_XOR: - vc5_move_pixels_general(gpu, gpu_stride, - cpu, cpu_stride, - cpp, image_h, box, - vc5_get_uif_no_xor_pixel_offset, - is_load); - break; - case VC5_TILING_UBLINEAR_2_COLUMN: - vc5_move_pixels_general(gpu, gpu_stride, - cpu, cpu_stride, - cpp, image_h, box, - vc5_get_ublinear_2_column_pixel_offset, - is_load); - break; - case VC5_TILING_UBLINEAR_1_COLUMN: - vc5_move_pixels_general(gpu, gpu_stride, - cpu, cpu_stride, - cpp, image_h, box, - vc5_get_ublinear_1_column_pixel_offset, - is_load); - break; - case VC5_TILING_LINEARTILE: - vc5_move_pixels_general(gpu, gpu_stride, - cpu, cpu_stride, - cpp, image_h, box, - vc5_get_lt_pixel_offset, - is_load); - break; - default: - unreachable("Unsupported tiling format"); - break; - } -} - -/** - * Loads pixel data from the start (microtile-aligned) box in \p src to the - * start of \p dst according to the given tiling format. - */ -void -vc5_load_tiled_image(void *dst, uint32_t dst_stride, - void *src, uint32_t src_stride, - enum vc5_tiling_mode tiling_format, int cpp, - uint32_t image_h, - const struct pipe_box *box) -{ - vc5_move_tiled_image(src, src_stride, - dst, dst_stride, - tiling_format, - cpp, - image_h, - box, - true); -} - -/** - * Stores pixel data from the start of \p src into a (microtile-aligned) box in - * \p dst according to the given tiling format. - */ -void -vc5_store_tiled_image(void *dst, uint32_t dst_stride, - void *src, uint32_t src_stride, - enum vc5_tiling_mode tiling_format, int cpp, - uint32_t image_h, - const struct pipe_box *box) -{ - vc5_move_tiled_image(dst, dst_stride, - src, src_stride, - tiling_format, - cpp, - image_h, - box, - false); -} diff --git a/src/gallium/drivers/vc5/vc5_tiling.h b/src/gallium/drivers/vc5/vc5_tiling.h deleted file mode 100644 index d3cf48c4527..00000000000 --- a/src/gallium/drivers/vc5/vc5_tiling.h +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Copyright © 2014 Broadcom - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#ifndef VC5_TILING_H -#define VC5_TILING_H - -uint32_t vc5_utile_width(int cpp) ATTRIBUTE_CONST; -uint32_t vc5_utile_height(int cpp) ATTRIBUTE_CONST; -bool vc5_size_is_lt(uint32_t width, uint32_t height, int cpp) ATTRIBUTE_CONST; -void vc5_load_utile(void *dst, void *src, uint32_t dst_stride, uint32_t cpp); -void vc5_store_utile(void *dst, void *src, uint32_t src_stride, uint32_t cpp); -void vc5_load_tiled_image(void *dst, uint32_t dst_stride, - void *src, uint32_t src_stride, - enum vc5_tiling_mode tiling_format, int cpp, - uint32_t image_h, - const struct pipe_box *box); -void vc5_store_tiled_image(void *dst, uint32_t dst_stride, - void *src, uint32_t src_stride, - enum vc5_tiling_mode tiling_format, int cpp, - uint32_t image_h, - const struct pipe_box *box); - -#endif /* VC5_TILING_H */ diff --git a/src/gallium/drivers/vc5/vc5_uniforms.c b/src/gallium/drivers/vc5/vc5_uniforms.c deleted file mode 100644 index 03b6d8381c1..00000000000 --- a/src/gallium/drivers/vc5/vc5_uniforms.c +++ /dev/null @@ -1,489 +0,0 @@ -/* - * Copyright © 2014-2017 Broadcom - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "util/u_pack_color.h" -#include "util/format_srgb.h" - -#include "vc5_context.h" -#include "compiler/v3d_compiler.h" -#include "broadcom/cle/v3d_packet_v33_pack.h" - -#if 0 - -#define SWIZ(x,y,z,w) { \ - PIPE_SWIZZLE_##x, \ - PIPE_SWIZZLE_##y, \ - PIPE_SWIZZLE_##z, \ - PIPE_SWIZZLE_##w \ -} - -static void -write_texture_border_color(struct vc5_job *job, - struct vc5_cl_out **uniforms, - struct vc5_texture_stateobj *texstate, - uint32_t unit) -{ - struct pipe_sampler_state *sampler = texstate->samplers[unit]; - struct pipe_sampler_view *texture = texstate->textures[unit]; - struct vc5_resource *rsc = vc5_resource(texture->texture); - union util_color uc; - - const struct util_format_description *tex_format_desc = - util_format_description(texture->format); - - float border_color[4]; - for (int i = 0; i < 4; i++) - border_color[i] = sampler->border_color.f[i]; - if (util_format_is_srgb(texture->format)) { - for (int i = 0; i < 3; i++) - border_color[i] = - util_format_linear_to_srgb_float(border_color[i]); - } - - /* Turn the border color into the layout of channels that it would - * have when stored as texture contents. - */ - float storage_color[4]; - util_format_unswizzle_4f(storage_color, - border_color, - tex_format_desc->swizzle); - - /* Now, pack so that when the vc5_format-sampled texture contents are - * replaced with our border color, the vc5_get_format_swizzle() - * swizzling will get the right channels. - */ - if (util_format_is_depth_or_stencil(texture->format)) { - uc.ui[0] = util_pack_z(PIPE_FORMAT_Z24X8_UNORM, - sampler->border_color.f[0]) << 8; - } else { - switch (rsc->vc5_format) { - default: - case VC5_TEXTURE_TYPE_RGBA8888: - util_pack_color(storage_color, - PIPE_FORMAT_R8G8B8A8_UNORM, &uc); - break; - case VC5_TEXTURE_TYPE_RGBA4444: - util_pack_color(storage_color, - PIPE_FORMAT_A8B8G8R8_UNORM, &uc); - break; - case VC5_TEXTURE_TYPE_RGB565: - util_pack_color(storage_color, - PIPE_FORMAT_B8G8R8A8_UNORM, &uc); - break; - case VC5_TEXTURE_TYPE_ALPHA: - uc.ui[0] = float_to_ubyte(storage_color[0]) << 24; - break; - case VC5_TEXTURE_TYPE_LUMALPHA: - uc.ui[0] = ((float_to_ubyte(storage_color[1]) << 24) | - (float_to_ubyte(storage_color[0]) << 0)); - break; - } - } - - cl_aligned_u32(uniforms, uc.ui[0]); -} -#endif - -static uint32_t -get_texrect_scale(struct vc5_texture_stateobj *texstate, - enum quniform_contents contents, - uint32_t data) -{ - struct pipe_sampler_view *texture = texstate->textures[data]; - uint32_t dim; - - if (contents == QUNIFORM_TEXRECT_SCALE_X) - dim = texture->texture->width0; - else - dim = texture->texture->height0; - - return fui(1.0f / dim); -} - -static uint32_t -get_texture_size(struct vc5_texture_stateobj *texstate, - enum quniform_contents contents, - uint32_t data) -{ - struct pipe_sampler_view *texture = texstate->textures[data]; - - switch (contents) { - case QUNIFORM_TEXTURE_WIDTH: - return u_minify(texture->texture->width0, - texture->u.tex.first_level); - case QUNIFORM_TEXTURE_HEIGHT: - return u_minify(texture->texture->height0, - texture->u.tex.first_level); - case QUNIFORM_TEXTURE_DEPTH: - return u_minify(texture->texture->depth0, - texture->u.tex.first_level); - case QUNIFORM_TEXTURE_ARRAY_SIZE: - return texture->texture->array_size; - case QUNIFORM_TEXTURE_LEVELS: - return (texture->u.tex.last_level - - texture->u.tex.first_level) + 1; - default: - unreachable("Bad texture size field"); - } -} - -static struct vc5_bo * -vc5_upload_ubo(struct vc5_context *vc5, - struct vc5_compiled_shader *shader, - const uint32_t *gallium_uniforms) -{ - if (!shader->prog_data.base->ubo_size) - return NULL; - - struct vc5_bo *ubo = vc5_bo_alloc(vc5->screen, - shader->prog_data.base->ubo_size, - "ubo"); - void *data = vc5_bo_map(ubo); - for (uint32_t i = 0; i < shader->prog_data.base->num_ubo_ranges; i++) { - memcpy(data + shader->prog_data.base->ubo_ranges[i].dst_offset, - ((const void *)gallium_uniforms + - shader->prog_data.base->ubo_ranges[i].src_offset), - shader->prog_data.base->ubo_ranges[i].size); - } - - return ubo; -} - -/** - * Writes the V3D 3.x P0 (CFG_MODE=1) texture parameter. - * - * Some bits of this field are dependent on the type of sample being done by - * the shader, while other bits are dependent on the sampler state. We OR the - * two together here. - */ -static void -write_texture_p0(struct vc5_job *job, - struct vc5_cl_out **uniforms, - struct vc5_texture_stateobj *texstate, - uint32_t unit, - uint32_t shader_data) -{ - struct pipe_sampler_state *psampler = texstate->samplers[unit]; - struct vc5_sampler_state *sampler = vc5_sampler_state(psampler); - - cl_aligned_u32(uniforms, shader_data | sampler->p0); -} - -/** Writes the V3D 3.x P1 (CFG_MODE=1) texture parameter. */ -static void -write_texture_p1(struct vc5_job *job, - struct vc5_cl_out **uniforms, - struct vc5_texture_stateobj *texstate, - uint32_t data) -{ - /* Extract the texture unit from the top bits, and the compiler's - * packed p1 from the bottom. - */ - uint32_t unit = data >> 5; - uint32_t p1 = data & 0x1f; - - struct pipe_sampler_view *psview = texstate->textures[unit]; - struct vc5_sampler_view *sview = vc5_sampler_view(psview); - - struct V3D33_TEXTURE_UNIFORM_PARAMETER_1_CFG_MODE1 unpacked = { - .texture_state_record_base_address = texstate->texture_state[unit], - }; - - uint32_t packed; - V3D33_TEXTURE_UNIFORM_PARAMETER_1_CFG_MODE1_pack(&job->indirect, - (uint8_t *)&packed, - &unpacked); - - cl_aligned_u32(uniforms, p1 | packed | sview->p1); -} - -/** Writes the V3D 4.x TMU configuration parameter 0. */ -static void -write_tmu_p0(struct vc5_job *job, - struct vc5_cl_out **uniforms, - struct vc5_texture_stateobj *texstate, - uint32_t data) -{ - /* Extract the texture unit from the top bits, and the compiler's - * packed p0 from the bottom. - */ - uint32_t unit = data >> 24; - uint32_t p0 = data & 0x00ffffff; - - struct pipe_sampler_view *psview = texstate->textures[unit]; - struct vc5_sampler_view *sview = vc5_sampler_view(psview); - struct vc5_resource *rsc = vc5_resource(psview->texture); - - cl_aligned_reloc(&job->indirect, uniforms, sview->bo, p0); - vc5_job_add_bo(job, rsc->bo); -} - -/** Writes the V3D 4.x TMU configuration parameter 1. */ -static void -write_tmu_p1(struct vc5_job *job, - struct vc5_cl_out **uniforms, - struct vc5_texture_stateobj *texstate, - uint32_t data) -{ - /* Extract the texture unit from the top bits, and the compiler's - * packed p1 from the bottom. - */ - uint32_t unit = data >> 24; - uint32_t p0 = data & 0x00ffffff; - - struct pipe_sampler_state *psampler = texstate->samplers[unit]; - struct vc5_sampler_state *sampler = vc5_sampler_state(psampler); - - cl_aligned_reloc(&job->indirect, uniforms, sampler->bo, p0); -} - -struct vc5_cl_reloc -vc5_write_uniforms(struct vc5_context *vc5, struct vc5_compiled_shader *shader, - struct vc5_constbuf_stateobj *cb, - struct vc5_texture_stateobj *texstate) -{ - struct v3d_uniform_list *uinfo = &shader->prog_data.base->uniforms; - struct vc5_job *job = vc5->job; - const uint32_t *gallium_uniforms = cb->cb[0].user_buffer; - struct vc5_bo *ubo = vc5_upload_ubo(vc5, shader, gallium_uniforms); - - /* We always need to return some space for uniforms, because the HW - * will be prefetching, even if we don't read any in the program. - */ - vc5_cl_ensure_space(&job->indirect, MAX2(uinfo->count, 1) * 4, 4); - - struct vc5_cl_reloc uniform_stream = cl_get_address(&job->indirect); - vc5_bo_reference(uniform_stream.bo); - - struct vc5_cl_out *uniforms = - cl_start(&job->indirect); - - for (int i = 0; i < uinfo->count; i++) { - - switch (uinfo->contents[i]) { - case QUNIFORM_CONSTANT: - cl_aligned_u32(&uniforms, uinfo->data[i]); - break; - case QUNIFORM_UNIFORM: - cl_aligned_u32(&uniforms, - gallium_uniforms[uinfo->data[i]]); - break; - case QUNIFORM_VIEWPORT_X_SCALE: - cl_aligned_f(&uniforms, vc5->viewport.scale[0] * 256.0f); - break; - case QUNIFORM_VIEWPORT_Y_SCALE: - cl_aligned_f(&uniforms, vc5->viewport.scale[1] * 256.0f); - break; - - case QUNIFORM_VIEWPORT_Z_OFFSET: - cl_aligned_f(&uniforms, vc5->viewport.translate[2]); - break; - case QUNIFORM_VIEWPORT_Z_SCALE: - cl_aligned_f(&uniforms, vc5->viewport.scale[2]); - break; - - case QUNIFORM_USER_CLIP_PLANE: - cl_aligned_f(&uniforms, - vc5->clip.ucp[uinfo->data[i] / 4][uinfo->data[i] % 4]); - break; - - case QUNIFORM_TMU_CONFIG_P0: - write_tmu_p0(job, &uniforms, texstate, - uinfo->data[i]); - break; - - case QUNIFORM_TMU_CONFIG_P1: - write_tmu_p1(job, &uniforms, texstate, - uinfo->data[i]); - break; - - case QUNIFORM_TEXTURE_CONFIG_P1: - write_texture_p1(job, &uniforms, texstate, - uinfo->data[i]); - break; - -#if 0 - case QUNIFORM_TEXTURE_FIRST_LEVEL: - write_texture_first_level(job, &uniforms, texstate, - uinfo->data[i]); - break; -#endif - - case QUNIFORM_TEXRECT_SCALE_X: - case QUNIFORM_TEXRECT_SCALE_Y: - cl_aligned_u32(&uniforms, - get_texrect_scale(texstate, - uinfo->contents[i], - uinfo->data[i])); - break; - - case QUNIFORM_TEXTURE_WIDTH: - case QUNIFORM_TEXTURE_HEIGHT: - case QUNIFORM_TEXTURE_DEPTH: - case QUNIFORM_TEXTURE_ARRAY_SIZE: - case QUNIFORM_TEXTURE_LEVELS: - cl_aligned_u32(&uniforms, - get_texture_size(texstate, - uinfo->contents[i], - uinfo->data[i])); - break; - - case QUNIFORM_STENCIL: - cl_aligned_u32(&uniforms, - vc5->zsa->stencil_uniforms[uinfo->data[i]] | - (uinfo->data[i] <= 1 ? - (vc5->stencil_ref.ref_value[uinfo->data[i]] << 8) : - 0)); - break; - - case QUNIFORM_ALPHA_REF: - cl_aligned_f(&uniforms, - vc5->zsa->base.alpha.ref_value); - break; - - case QUNIFORM_SAMPLE_MASK: - cl_aligned_u32(&uniforms, vc5->sample_mask); - break; - - case QUNIFORM_UBO_ADDR: - if (uinfo->data[i] == 0) { - cl_aligned_reloc(&job->indirect, &uniforms, - ubo, 0); - } else { - int ubo_index = uinfo->data[i]; - struct vc5_resource *rsc = - vc5_resource(cb->cb[ubo_index].buffer); - - cl_aligned_reloc(&job->indirect, &uniforms, - rsc->bo, - cb->cb[ubo_index].buffer_offset); - } - break; - - case QUNIFORM_TEXTURE_FIRST_LEVEL: - cl_aligned_f(&uniforms, - texstate->textures[uinfo->data[i]]->u.tex.first_level); - break; - - case QUNIFORM_TEXTURE_BORDER_COLOR: - /* XXX */ - break; - - case QUNIFORM_SPILL_OFFSET: - cl_aligned_reloc(&job->indirect, &uniforms, - vc5->prog.spill_bo, 0); - break; - - case QUNIFORM_SPILL_SIZE_PER_THREAD: - cl_aligned_u32(&uniforms, - vc5->prog.spill_size_per_thread); - break; - - default: - assert(quniform_contents_is_texture_p0(uinfo->contents[i])); - - write_texture_p0(job, &uniforms, texstate, - uinfo->contents[i] - - QUNIFORM_TEXTURE_CONFIG_P0_0, - uinfo->data[i]); - break; - - } -#if 0 - uint32_t written_val = *((uint32_t *)uniforms - 1); - fprintf(stderr, "shader %p[%d]: 0x%08x / 0x%08x (%f)\n", - shader, i, __gen_address_offset(&uniform_stream) + i * 4, - written_val, uif(written_val)); -#endif - } - - cl_end(&job->indirect, uniforms); - - vc5_bo_unreference(&ubo); - - return uniform_stream; -} - -void -vc5_set_shader_uniform_dirty_flags(struct vc5_compiled_shader *shader) -{ - uint32_t dirty = 0; - - for (int i = 0; i < shader->prog_data.base->uniforms.count; i++) { - switch (shader->prog_data.base->uniforms.contents[i]) { - case QUNIFORM_CONSTANT: - break; - case QUNIFORM_UNIFORM: - case QUNIFORM_UBO_ADDR: - dirty |= VC5_DIRTY_CONSTBUF; - break; - - case QUNIFORM_VIEWPORT_X_SCALE: - case QUNIFORM_VIEWPORT_Y_SCALE: - case QUNIFORM_VIEWPORT_Z_OFFSET: - case QUNIFORM_VIEWPORT_Z_SCALE: - dirty |= VC5_DIRTY_VIEWPORT; - break; - - case QUNIFORM_USER_CLIP_PLANE: - dirty |= VC5_DIRTY_CLIP; - break; - - case QUNIFORM_TMU_CONFIG_P0: - case QUNIFORM_TMU_CONFIG_P1: - case QUNIFORM_TEXTURE_CONFIG_P1: - case QUNIFORM_TEXTURE_BORDER_COLOR: - case QUNIFORM_TEXTURE_FIRST_LEVEL: - case QUNIFORM_TEXRECT_SCALE_X: - case QUNIFORM_TEXRECT_SCALE_Y: - case QUNIFORM_TEXTURE_WIDTH: - case QUNIFORM_TEXTURE_HEIGHT: - case QUNIFORM_TEXTURE_DEPTH: - case QUNIFORM_TEXTURE_ARRAY_SIZE: - case QUNIFORM_TEXTURE_LEVELS: - case QUNIFORM_SPILL_OFFSET: - case QUNIFORM_SPILL_SIZE_PER_THREAD: - /* We could flag this on just the stage we're - * compiling for, but it's not passed in. - */ - dirty |= VC5_DIRTY_FRAGTEX | VC5_DIRTY_VERTTEX; - break; - - case QUNIFORM_STENCIL: - case QUNIFORM_ALPHA_REF: - dirty |= VC5_DIRTY_ZSA; - break; - - case QUNIFORM_SAMPLE_MASK: - dirty |= VC5_DIRTY_SAMPLE_MASK; - break; - - default: - assert(quniform_contents_is_texture_p0(shader->prog_data.base->uniforms.contents[i])); - dirty |= VC5_DIRTY_FRAGTEX | VC5_DIRTY_VERTTEX; - break; - } - } - - shader->uniform_dirty_bits = dirty; -} diff --git a/src/gallium/meson.build b/src/gallium/meson.build index a93bf32139b..561af9d339c 100644 --- a/src/gallium/meson.build +++ b/src/gallium/meson.build @@ -95,8 +95,8 @@ else driver_pl111 = declare_dependency() endif if with_gallium_v3d - subdir('winsys/vc5/drm') - subdir('drivers/vc5') + subdir('winsys/v3d/drm') + subdir('drivers/v3d') else driver_v3d = declare_dependency() endif diff --git a/src/gallium/targets/dri/Makefile.am b/src/gallium/targets/dri/Makefile.am index a0778b64e58..a857b5129fd 100644 --- a/src/gallium/targets/dri/Makefile.am +++ b/src/gallium/targets/dri/Makefile.am @@ -76,8 +76,8 @@ include $(top_srcdir)/src/gallium/drivers/freedreno/Automake.inc include $(top_srcdir)/src/gallium/drivers/tegra/Automake.inc +include $(top_srcdir)/src/gallium/drivers/v3d/Automake.inc include $(top_srcdir)/src/gallium/drivers/vc4/Automake.inc -include $(top_srcdir)/src/gallium/drivers/vc5/Automake.inc include $(top_srcdir)/src/gallium/drivers/pl111/Automake.inc include $(top_srcdir)/src/gallium/drivers/virgl/Automake.inc diff --git a/src/gallium/winsys/v3d/drm/Android.mk b/src/gallium/winsys/v3d/drm/Android.mk new file mode 100644 index 00000000000..4cdd969c268 --- /dev/null +++ b/src/gallium/winsys/v3d/drm/Android.mk @@ -0,0 +1,33 @@ +# Copyright (C) 2014 Emil Velikov +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +LOCAL_PATH := $(call my-dir) + +# get C_SOURCES +include $(LOCAL_PATH)/Makefile.sources + +include $(CLEAR_VARS) + +LOCAL_SRC_FILES := $(C_SOURCES) + +LOCAL_MODULE := libmesa_winsys_v3d + +include $(GALLIUM_COMMON_MK) +include $(BUILD_STATIC_LIBRARY) diff --git a/src/gallium/winsys/v3d/drm/Makefile.am b/src/gallium/winsys/v3d/drm/Makefile.am new file mode 100644 index 00000000000..ac2ef23bb64 --- /dev/null +++ b/src/gallium/winsys/v3d/drm/Makefile.am @@ -0,0 +1,33 @@ +# Copyright © 2014 Broadcom +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. + +include Makefile.sources +include $(top_srcdir)/src/gallium/Automake.inc + +AM_CFLAGS = \ + -I$(top_srcdir)/src/gallium/drivers \ + $(GALLIUM_WINSYS_CFLAGS) + +noinst_LTLIBRARIES = libv3ddrm.la + +libv3ddrm_la_SOURCES = $(C_SOURCES) + +EXTRA_DIST = meson.build diff --git a/src/gallium/winsys/v3d/drm/Makefile.sources b/src/gallium/winsys/v3d/drm/Makefile.sources new file mode 100644 index 00000000000..1fdeefbcbee --- /dev/null +++ b/src/gallium/winsys/v3d/drm/Makefile.sources @@ -0,0 +1,3 @@ +C_SOURCES := \ + v3d_drm_public.h \ + v3d_drm_winsys.c diff --git a/src/gallium/winsys/v3d/drm/meson.build b/src/gallium/winsys/v3d/drm/meson.build new file mode 100644 index 00000000000..e5d7b12d1df --- /dev/null +++ b/src/gallium/winsys/v3d/drm/meson.build @@ -0,0 +1,29 @@ +# Copyright © 2017 Broadcom +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +libv3dwinsys = static_library( + 'v3dwinsys', + files('v3d_drm_winsys.c'), + include_directories : [ + inc_src, inc_include, + inc_gallium, inc_gallium_aux, inc_gallium_drivers, + ], + c_args : [c_vis_args], +) diff --git a/src/gallium/winsys/v3d/drm/v3d_drm_public.h b/src/gallium/winsys/v3d/drm/v3d_drm_public.h new file mode 100644 index 00000000000..46aed9d4e18 --- /dev/null +++ b/src/gallium/winsys/v3d/drm/v3d_drm_public.h @@ -0,0 +1,31 @@ +/* + * Copyright © 2014 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef __VC5_DRM_PUBLIC_H__ +#define __VC5_DRM_PUBLIC_H__ + +struct pipe_screen; + +struct pipe_screen *v3d_drm_screen_create(int drmFD); + +#endif /* __VC5_DRM_PUBLIC_H__ */ diff --git a/src/gallium/winsys/v3d/drm/v3d_drm_winsys.c b/src/gallium/winsys/v3d/drm/v3d_drm_winsys.c new file mode 100644 index 00000000000..63b7a5717f5 --- /dev/null +++ b/src/gallium/winsys/v3d/drm/v3d_drm_winsys.c @@ -0,0 +1,35 @@ +/* + * Copyright © 2014 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include + +#include "v3d_drm_public.h" + +#include "v3d/v3d_screen.h" + +struct pipe_screen * +v3d_drm_screen_create(int fd) +{ + return v3d_screen_create(fcntl(fd, F_DUPFD_CLOEXEC, 3)); +} diff --git a/src/gallium/winsys/vc5/drm/Android.mk b/src/gallium/winsys/vc5/drm/Android.mk deleted file mode 100644 index 4cdd969c268..00000000000 --- a/src/gallium/winsys/vc5/drm/Android.mk +++ /dev/null @@ -1,33 +0,0 @@ -# Copyright (C) 2014 Emil Velikov -# -# Permission is hereby granted, free of charge, to any person obtaining a -# copy of this software and associated documentation files (the "Software"), -# to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, -# and/or sell copies of the Software, and to permit persons to whom the -# Software is furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included -# in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -# DEALINGS IN THE SOFTWARE. - -LOCAL_PATH := $(call my-dir) - -# get C_SOURCES -include $(LOCAL_PATH)/Makefile.sources - -include $(CLEAR_VARS) - -LOCAL_SRC_FILES := $(C_SOURCES) - -LOCAL_MODULE := libmesa_winsys_v3d - -include $(GALLIUM_COMMON_MK) -include $(BUILD_STATIC_LIBRARY) diff --git a/src/gallium/winsys/vc5/drm/Makefile.am b/src/gallium/winsys/vc5/drm/Makefile.am deleted file mode 100644 index ac2ef23bb64..00000000000 --- a/src/gallium/winsys/vc5/drm/Makefile.am +++ /dev/null @@ -1,33 +0,0 @@ -# Copyright © 2014 Broadcom -# -# Permission is hereby granted, free of charge, to any person obtaining a -# copy of this software and associated documentation files (the "Software"), -# to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, -# and/or sell copies of the Software, and to permit persons to whom the -# Software is furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice (including the next -# paragraph) shall be included in all copies or substantial portions of the -# Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS -# IN THE SOFTWARE. - -include Makefile.sources -include $(top_srcdir)/src/gallium/Automake.inc - -AM_CFLAGS = \ - -I$(top_srcdir)/src/gallium/drivers \ - $(GALLIUM_WINSYS_CFLAGS) - -noinst_LTLIBRARIES = libv3ddrm.la - -libv3ddrm_la_SOURCES = $(C_SOURCES) - -EXTRA_DIST = meson.build diff --git a/src/gallium/winsys/vc5/drm/Makefile.sources b/src/gallium/winsys/vc5/drm/Makefile.sources deleted file mode 100644 index ea7566f8dc6..00000000000 --- a/src/gallium/winsys/vc5/drm/Makefile.sources +++ /dev/null @@ -1,3 +0,0 @@ -C_SOURCES := \ - vc5_drm_public.h \ - vc5_drm_winsys.c diff --git a/src/gallium/winsys/vc5/drm/meson.build b/src/gallium/winsys/vc5/drm/meson.build deleted file mode 100644 index 401aff8f47d..00000000000 --- a/src/gallium/winsys/vc5/drm/meson.build +++ /dev/null @@ -1,29 +0,0 @@ -# Copyright © 2017 Broadcom -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -libv3dwinsys = static_library( - 'v3dwinsys', - files('vc5_drm_winsys.c'), - include_directories : [ - inc_src, inc_include, - inc_gallium, inc_gallium_aux, inc_gallium_drivers, - ], - c_args : [c_vis_args], -) diff --git a/src/gallium/winsys/vc5/drm/vc5_drm_public.h b/src/gallium/winsys/vc5/drm/vc5_drm_public.h deleted file mode 100644 index 46aed9d4e18..00000000000 --- a/src/gallium/winsys/vc5/drm/vc5_drm_public.h +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Copyright © 2014 Broadcom - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#ifndef __VC5_DRM_PUBLIC_H__ -#define __VC5_DRM_PUBLIC_H__ - -struct pipe_screen; - -struct pipe_screen *v3d_drm_screen_create(int drmFD); - -#endif /* __VC5_DRM_PUBLIC_H__ */ diff --git a/src/gallium/winsys/vc5/drm/vc5_drm_winsys.c b/src/gallium/winsys/vc5/drm/vc5_drm_winsys.c deleted file mode 100644 index 9849ef4e006..00000000000 --- a/src/gallium/winsys/vc5/drm/vc5_drm_winsys.c +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Copyright © 2014 Broadcom - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include -#include - -#include "vc5_drm_public.h" - -#include "vc5/vc5_screen.h" - -struct pipe_screen * -v3d_drm_screen_create(int fd) -{ - return v3d_screen_create(fcntl(fd, F_DUPFD_CLOEXEC, 3)); -}