From 757071ca7cdda12d2974614f9a9d02d5a834f38c Mon Sep 17 00:00:00 2001 From: =?utf8?q?Fredrik=20H=C3=B6glund?= Date: Fri, 8 Jan 2016 16:31:14 -0500 Subject: [PATCH] st/mesa: Accelerate PBO uploads MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Create a PIPE_BUFFER sampler view on the pixel-unpack buffer, and draw the image on the texture with a fragment shader that maps fragment coordinates to buffer coordinates. Modifications by Nicolai Hähnle: - various cleanups and fixes (e.g. error handling, corner cases) - split try_pbo_upload into two functions, which will allow code to be shared with compressed texture uploads - modify the source format selection to only test for support against the PIPE_BUFFER target v2: - update handling of TGSI_SEMANTIC_POSITION for recent changes in master - MaxTextureBufferSize is number of texels, not bytes (Ilia Mirkin) - only enable when integers are supported (Marek Olšák) - try harder to hit the TextureBufferOffsetAlignment - remove unnecessary MOV from the fragment shader Reviewed-by: Edward O'Callaghan --- src/mesa/state_tracker/st_cb_texture.c | 890 ++++++++++++++++++++++++- src/mesa/state_tracker/st_cb_texture.h | 5 + src/mesa/state_tracker/st_context.c | 2 + src/mesa/state_tracker/st_context.h | 10 + 4 files changed, 898 insertions(+), 9 deletions(-) diff --git a/src/mesa/state_tracker/st_cb_texture.c b/src/mesa/state_tracker/st_cb_texture.c index 0e3a030fd6f..9d90d031f88 100644 --- a/src/mesa/state_tracker/st_cb_texture.c +++ b/src/mesa/state_tracker/st_cb_texture.c @@ -60,6 +60,7 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "util/u_inlines.h" +#include "util/u_upload_mgr.h" #include "pipe/p_shader_tokens.h" #include "util/u_tile.h" #include "util/u_format.h" @@ -67,6 +68,9 @@ #include "util/u_sampler.h" #include "util/u_math.h" #include "util/u_box.h" +#include "util/u_simple_shaders.h" +#include "cso_cache/cso_context.h" +#include "tgsi/tgsi_ureg.h" #define DBG if (0) printf @@ -686,6 +690,865 @@ st_get_blit_mask(GLenum srcFormat, GLenum dstFormat) } } +void +st_init_pbo_upload(struct st_context *st) +{ + struct pipe_context *pipe = st->pipe; + struct pipe_screen *screen = pipe->screen; + + st->pbo_upload.enabled = + screen->get_param(screen, PIPE_CAP_TEXTURE_BUFFER_OBJECTS) && + screen->get_param(screen, PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT) >= 1 && + screen->get_shader_param(screen, PIPE_SHADER_FRAGMENT, PIPE_SHADER_CAP_INTEGERS); + if (!st->pbo_upload.enabled) + return; + + st->pbo_upload.rgba_only = + screen->get_param(screen, PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY); + + /* Create the vertex shader */ + { + unsigned semantic_names[] = { TGSI_SEMANTIC_POSITION }; + unsigned semantic_indexes[] = { 0 }; + + st->pbo_upload.vs = util_make_vertex_passthrough_shader(pipe, 1, + semantic_names, + semantic_indexes, + FALSE); + } + + /* Blend state */ + memset(&st->pbo_upload.blend, 0, sizeof(struct pipe_blend_state)); + st->pbo_upload.blend.rt[0].colormask = PIPE_MASK_RGBA; + + /* Rasterizer state */ + memset(&st->pbo_upload.raster, 0, sizeof(struct pipe_rasterizer_state)); + st->pbo_upload.raster.half_pixel_center = 1; +} + +void +st_destroy_pbo_upload(struct st_context *st) +{ + if (st->pbo_upload.fs) { + cso_delete_fragment_shader(st->cso_context, st->pbo_upload.fs); + st->pbo_upload.fs = NULL; + } + + if (st->pbo_upload.vs) { + cso_delete_vertex_shader(st->cso_context, st->pbo_upload.vs); + st->pbo_upload.vs = NULL; + } +} + +/** + * Converts format to a format with the same components, types + * and sizes, but with the components in RGBA order. + */ +static enum pipe_format +unswizzle_format(enum pipe_format format) +{ + switch (format) + { + case PIPE_FORMAT_B8G8R8A8_UNORM: + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_A8B8G8R8_UNORM: + return PIPE_FORMAT_R8G8B8A8_UNORM; + + case PIPE_FORMAT_B10G10R10A2_UNORM: + return PIPE_FORMAT_R10G10B10A2_UNORM; + + case PIPE_FORMAT_B10G10R10A2_SNORM: + return PIPE_FORMAT_R10G10B10A2_SNORM; + + case PIPE_FORMAT_B10G10R10A2_UINT: + return PIPE_FORMAT_R10G10B10A2_UINT; + + default: + return format; + } +} + +/** + * Converts PIPE_FORMAT_A* to PIPE_FORMAT_R*. + */ +static enum pipe_format +alpha_to_red(enum pipe_format format) +{ + switch (format) + { + case PIPE_FORMAT_A8_UNORM: + return PIPE_FORMAT_R8_UNORM; + case PIPE_FORMAT_A8_SNORM: + return PIPE_FORMAT_R8_SNORM; + case PIPE_FORMAT_A8_UINT: + return PIPE_FORMAT_R8_UINT; + case PIPE_FORMAT_A8_SINT: + return PIPE_FORMAT_R8_SINT; + + case PIPE_FORMAT_A16_UNORM: + return PIPE_FORMAT_R16_UNORM; + case PIPE_FORMAT_A16_SNORM: + return PIPE_FORMAT_R16_SNORM; + case PIPE_FORMAT_A16_UINT: + return PIPE_FORMAT_R16_UINT; + case PIPE_FORMAT_A16_SINT: + return PIPE_FORMAT_R16_SINT; + case PIPE_FORMAT_A16_FLOAT: + return PIPE_FORMAT_R16_FLOAT; + + case PIPE_FORMAT_A32_UINT: + return PIPE_FORMAT_R32_UINT; + case PIPE_FORMAT_A32_SINT: + return PIPE_FORMAT_R32_SINT; + case PIPE_FORMAT_A32_FLOAT: + return PIPE_FORMAT_R32_FLOAT; + + default: + return format; + } +} + +/** + * Converts PIPE_FORMAT_R*A* to PIPE_FORMAT_R*G*. + */ +static enum pipe_format +red_alpha_to_red_green(enum pipe_format format) +{ + switch (format) + { + case PIPE_FORMAT_R8A8_UNORM: + return PIPE_FORMAT_R8G8_UNORM; + case PIPE_FORMAT_R8A8_SNORM: + return PIPE_FORMAT_R8G8_SNORM; + case PIPE_FORMAT_R8A8_UINT: + return PIPE_FORMAT_R8G8_UINT; + case PIPE_FORMAT_R8A8_SINT: + return PIPE_FORMAT_R8G8_SINT; + + case PIPE_FORMAT_R16A16_UNORM: + return PIPE_FORMAT_R16G16_UNORM; + case PIPE_FORMAT_R16A16_SNORM: + return PIPE_FORMAT_R16G16_SNORM; + case PIPE_FORMAT_R16A16_UINT: + return PIPE_FORMAT_R16G16_UINT; + case PIPE_FORMAT_R16A16_SINT: + return PIPE_FORMAT_R16G16_SINT; + case PIPE_FORMAT_R16A16_FLOAT: + return PIPE_FORMAT_R16G16_FLOAT; + + case PIPE_FORMAT_R32A32_UINT: + return PIPE_FORMAT_R32G32_UINT; + case PIPE_FORMAT_R32A32_SINT: + return PIPE_FORMAT_R32G32_SINT; + case PIPE_FORMAT_R32A32_FLOAT: + return PIPE_FORMAT_R32G32_FLOAT; + + default: + return format; + } +} + +/** + * Converts PIPE_FORMAT_L*A* to PIPE_FORMAT_R*G*. + */ +static enum pipe_format +luminance_alpha_to_red_green(enum pipe_format format) +{ + switch (format) + { + case PIPE_FORMAT_L8A8_UNORM: + return PIPE_FORMAT_R8G8_UNORM; + case PIPE_FORMAT_L8A8_SNORM: + return PIPE_FORMAT_R8G8_SNORM; + case PIPE_FORMAT_L8A8_UINT: + return PIPE_FORMAT_R8G8_UINT; + case PIPE_FORMAT_L8A8_SINT: + return PIPE_FORMAT_R8G8_SINT; + + case PIPE_FORMAT_L16A16_UNORM: + return PIPE_FORMAT_R16G16_UNORM; + case PIPE_FORMAT_L16A16_SNORM: + return PIPE_FORMAT_R16G16_SNORM; + case PIPE_FORMAT_L16A16_UINT: + return PIPE_FORMAT_R16G16_UINT; + case PIPE_FORMAT_L16A16_SINT: + return PIPE_FORMAT_R16G16_SINT; + case PIPE_FORMAT_L16A16_FLOAT: + return PIPE_FORMAT_R16G16_FLOAT; + + case PIPE_FORMAT_L32A32_UINT: + return PIPE_FORMAT_R32G32_UINT; + case PIPE_FORMAT_L32A32_SINT: + return PIPE_FORMAT_R32G32_SINT; + case PIPE_FORMAT_L32A32_FLOAT: + return PIPE_FORMAT_R32G32_FLOAT; + + default: + return format; + } +} + +/** + * Returns true if format is a PIPE_FORMAT_A* format, and false otherwise. + */ +static bool +format_is_alpha(enum pipe_format format) +{ + const struct util_format_description *desc = util_format_description(format); + + if (desc->nr_channels == 1 && + desc->swizzle[0] == UTIL_FORMAT_SWIZZLE_0 && + desc->swizzle[1] == UTIL_FORMAT_SWIZZLE_0 && + desc->swizzle[2] == UTIL_FORMAT_SWIZZLE_0 && + desc->swizzle[3] == UTIL_FORMAT_SWIZZLE_X) + return true; + + return false; +} + +/** + * Returns true if format is a PIPE_FORMAT_R* format, and false otherwise. + */ +static bool +format_is_red(enum pipe_format format) +{ + const struct util_format_description *desc = util_format_description(format); + + if (desc->nr_channels == 1 && + desc->swizzle[0] == UTIL_FORMAT_SWIZZLE_X && + desc->swizzle[1] == UTIL_FORMAT_SWIZZLE_0 && + desc->swizzle[2] == UTIL_FORMAT_SWIZZLE_0 && + desc->swizzle[3] == UTIL_FORMAT_SWIZZLE_1) + return true; + + return false; +} + + +/** + * Returns true if format is a PIPE_FORMAT_L* format, and false otherwise. + */ +static bool +format_is_luminance(enum pipe_format format) +{ + const struct util_format_description *desc = util_format_description(format); + + if (desc->nr_channels == 1 && + desc->swizzle[0] == UTIL_FORMAT_SWIZZLE_X && + desc->swizzle[1] == UTIL_FORMAT_SWIZZLE_X && + desc->swizzle[2] == UTIL_FORMAT_SWIZZLE_X && + desc->swizzle[3] == UTIL_FORMAT_SWIZZLE_1) + return true; + + return false; +} + +/** + * Returns true if format is a PIPE_FORMAT_R*A* format, and false otherwise. + */ +static bool +format_is_red_alpha(enum pipe_format format) +{ + const struct util_format_description *desc = util_format_description(format); + + if (desc->nr_channels == 2 && + desc->swizzle[0] == UTIL_FORMAT_SWIZZLE_X && + desc->swizzle[1] == UTIL_FORMAT_SWIZZLE_0 && + desc->swizzle[2] == UTIL_FORMAT_SWIZZLE_0 && + desc->swizzle[3] == UTIL_FORMAT_SWIZZLE_Y) + return true; + + return false; +} + +static bool +format_is_swizzled_rgba(enum pipe_format format) +{ + const struct util_format_description *desc = util_format_description(format); + + if ((desc->swizzle[0] == TGSI_SWIZZLE_X || desc->swizzle[0] == UTIL_FORMAT_SWIZZLE_0) && + (desc->swizzle[1] == TGSI_SWIZZLE_Y || desc->swizzle[1] == UTIL_FORMAT_SWIZZLE_0) && + (desc->swizzle[2] == TGSI_SWIZZLE_Z || desc->swizzle[2] == UTIL_FORMAT_SWIZZLE_0) && + (desc->swizzle[3] == TGSI_SWIZZLE_W || desc->swizzle[3] == UTIL_FORMAT_SWIZZLE_1)) + return false; + + return true; +} + +struct format_table +{ + unsigned char swizzle[4]; + enum pipe_format format; +}; + +static const struct format_table table_8888_unorm[] = { + { { 0, 1, 2, 3 }, PIPE_FORMAT_R8G8B8A8_UNORM }, + { { 2, 1, 0, 3 }, PIPE_FORMAT_B8G8R8A8_UNORM }, + { { 3, 0, 1, 2 }, PIPE_FORMAT_A8R8G8B8_UNORM }, + { { 3, 2, 1, 0 }, PIPE_FORMAT_A8B8G8R8_UNORM } +}; + +static const struct format_table table_1010102_unorm[] = { + { { 0, 1, 2, 3 }, PIPE_FORMAT_R10G10B10A2_UNORM }, + { { 2, 1, 0, 3 }, PIPE_FORMAT_B10G10R10A2_UNORM } +}; + +static const struct format_table table_1010102_snorm[] = { + { { 0, 1, 2, 3 }, PIPE_FORMAT_R10G10B10A2_SNORM }, + { { 2, 1, 0, 3 }, PIPE_FORMAT_B10G10R10A2_SNORM } +}; + +static const struct format_table table_1010102_uint[] = { + { { 0, 1, 2, 3 }, PIPE_FORMAT_R10G10B10A2_UINT }, + { { 2, 1, 0, 3 }, PIPE_FORMAT_B10G10R10A2_UINT } +}; + +static enum pipe_format +swizzle_format(enum pipe_format format, const int * const swizzle) +{ + unsigned i; + + switch (format) { + case PIPE_FORMAT_R8G8B8A8_UNORM: + case PIPE_FORMAT_B8G8R8A8_UNORM: + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_A8B8G8R8_UNORM: + for (i = 0; i < ARRAY_SIZE(table_8888_unorm); i++) { + if (swizzle[0] == table_8888_unorm[i].swizzle[0] && + swizzle[1] == table_8888_unorm[i].swizzle[1] && + swizzle[2] == table_8888_unorm[i].swizzle[2] && + swizzle[3] == table_8888_unorm[i].swizzle[3]) + return table_8888_unorm[i].format; + } + break; + + case PIPE_FORMAT_R10G10B10A2_UNORM: + case PIPE_FORMAT_B10G10R10A2_UNORM: + for (i = 0; i < ARRAY_SIZE(table_1010102_unorm); i++) { + if (swizzle[0] == table_1010102_unorm[i].swizzle[0] && + swizzle[1] == table_1010102_unorm[i].swizzle[1] && + swizzle[2] == table_1010102_unorm[i].swizzle[2] && + swizzle[3] == table_1010102_unorm[i].swizzle[3]) + return table_1010102_unorm[i].format; + } + break; + + case PIPE_FORMAT_R10G10B10A2_SNORM: + case PIPE_FORMAT_B10G10R10A2_SNORM: + for (i = 0; i < ARRAY_SIZE(table_1010102_snorm); i++) { + if (swizzle[0] == table_1010102_snorm[i].swizzle[0] && + swizzle[1] == table_1010102_snorm[i].swizzle[1] && + swizzle[2] == table_1010102_snorm[i].swizzle[2] && + swizzle[3] == table_1010102_snorm[i].swizzle[3]) + return table_1010102_snorm[i].format; + } + break; + + case PIPE_FORMAT_R10G10B10A2_UINT: + case PIPE_FORMAT_B10G10R10A2_UINT: + for (i = 0; i < ARRAY_SIZE(table_1010102_uint); i++) { + if (swizzle[0] == table_1010102_uint[i].swizzle[0] && + swizzle[1] == table_1010102_uint[i].swizzle[1] && + swizzle[2] == table_1010102_uint[i].swizzle[2] && + swizzle[3] == table_1010102_uint[i].swizzle[3]) + return table_1010102_uint[i].format; + } + break; + + default: + break; + } + + return PIPE_FORMAT_NONE; +} + +static bool +reinterpret_formats(enum pipe_format *src_format, enum pipe_format *dst_format) +{ + enum pipe_format src = *src_format; + enum pipe_format dst = *dst_format; + + /* Note: dst_format has already been transformed from luminance/intensity + * to red when this function is called. The source format will never + * be an intensity format, because GL_INTENSITY is not a legal value + * for the format parameter in glTex(Sub)Image(). */ + + if (format_is_alpha(src)) { + if (!format_is_alpha(dst)) + return false; + + src = alpha_to_red(src); + dst = alpha_to_red(dst); + } else if (format_is_luminance(src)) { + if (!format_is_red(dst) && !format_is_red_alpha(dst)) + return false; + + src = util_format_luminance_to_red(src); + } else if (util_format_is_luminance_alpha(src)) { + src = luminance_alpha_to_red_green(src); + + if (format_is_red_alpha(dst)) { + dst = red_alpha_to_red_green(dst); + } else if (!format_is_red(dst)) + return false; + } else if (format_is_swizzled_rgba(src)) { + const struct util_format_description *src_desc = util_format_description(src); + const struct util_format_description *dst_desc = util_format_description(dst); + int swizzle[4]; + unsigned i; + + /* Make sure the format is an RGBA and not an RGBX format */ + if (src_desc->nr_channels != 4 || src_desc->swizzle[3] == UTIL_FORMAT_SWIZZLE_1) + return false; + + if (dst_desc->nr_channels != 4 || dst_desc->swizzle[3] == UTIL_FORMAT_SWIZZLE_1) + return false; + + for (i = 0; i < 4; i++) + swizzle[i] = dst_desc->swizzle[src_desc->swizzle[i]]; + + dst = swizzle_format(dst, swizzle); + if (dst == PIPE_FORMAT_NONE) + return false; + + src = unswizzle_format(src); + } + + *src_format = src; + *dst_format = dst; + return true; +} + +static void * +create_pbo_upload_shader(struct st_context *st) +{ + struct pipe_context *pipe = st->pipe; + struct pipe_screen *screen = pipe->screen; + struct ureg_program *ureg; + struct ureg_dst out; + struct ureg_src sampler; + struct ureg_src pos; + struct ureg_src const0; + struct ureg_dst temp0; + + ureg = ureg_create(TGSI_PROCESSOR_FRAGMENT); + out = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0); + sampler = ureg_DECL_sampler(ureg, 0); + if (screen->get_param(screen, PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL)) { + pos = ureg_DECL_system_value(ureg, TGSI_SEMANTIC_POSITION, 0); + } else { + pos = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_POSITION, 0, + TGSI_INTERPOLATE_LINEAR); + } + const0 = ureg_DECL_constant(ureg, 0); + temp0 = ureg_DECL_temporary(ureg); + + /* Note: const0 = [ -xoffset + skip_pixels, -yoffset, stride, 0 ] */ + + /* temp0.xy = f2i(temp0.xy) */ + ureg_F2I(ureg, ureg_writemask(temp0, TGSI_WRITEMASK_XY), + ureg_swizzle(pos, + TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, + TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Y)); + + /* temp0.xy = temp0.xy + const0.xy */ + ureg_UADD(ureg, ureg_writemask(temp0, TGSI_WRITEMASK_XY), + ureg_swizzle(ureg_src(temp0), + TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, + TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Y), + ureg_swizzle(const0, + TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, + TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Y)); + + /* temp0.x = const0.z * temp0.y + temp0.x */ + ureg_UMAD(ureg, ureg_writemask(temp0, TGSI_WRITEMASK_X), + ureg_scalar(const0, TGSI_SWIZZLE_Z), + ureg_scalar(ureg_src(temp0), TGSI_SWIZZLE_Y), + ureg_scalar(ureg_src(temp0), TGSI_SWIZZLE_X)); + + /* out = txf(sampler, temp0.x) */ + ureg_TXF(ureg, out, TGSI_TEXTURE_BUFFER, + ureg_scalar(ureg_src(temp0), TGSI_SWIZZLE_X), + sampler); + + ureg_release_temporary(ureg, temp0); + + ureg_END(ureg); + + return ureg_create_shader_and_destroy(ureg, pipe); +} + +static bool +try_pbo_upload_common(struct gl_context *ctx, + struct pipe_surface *surface, + int xoffset, int yoffset, + unsigned upload_width, unsigned upload_height, + struct pipe_resource *buffer, + enum pipe_format src_format, + intptr_t buf_offset, + unsigned bytes_per_pixel, + unsigned stride, + unsigned image_height) +{ + struct st_context *st = st_context(ctx); + struct pipe_context *pipe = st->pipe; + struct pipe_sampler_view *sampler_view = NULL; + unsigned depth = surface->u.tex.last_layer - surface->u.tex.first_layer + 1; + unsigned skip_pixels = 0; + + /* Check alignment. */ + { + unsigned ofs = (buf_offset * bytes_per_pixel) % ctx->Const.TextureBufferOffsetAlignment; + if (ofs != 0) { + if (ofs % bytes_per_pixel != 0) + return false; + + skip_pixels = ofs / bytes_per_pixel; + buf_offset -= skip_pixels; + } + } + + /* Create the shaders */ + if (!st->pbo_upload.fs) { + st->pbo_upload.fs = create_pbo_upload_shader(st); + if (!st->pbo_upload.fs) + return false; + } + + /* Set up the sampler_view */ + { + unsigned first_element = buf_offset; + unsigned last_element = buf_offset + skip_pixels + upload_width - 1 + + (upload_height - 1 + (depth - 1) * image_height) * stride; + struct pipe_sampler_view templ; + + /* This should be ensured by Mesa before calling our callbacks */ + assert((last_element + 1) * bytes_per_pixel <= buffer->width0); + + if (last_element - first_element > ctx->Const.MaxTextureBufferSize - 1) + return false; + + memset(&templ, 0, sizeof(templ)); + templ.format = src_format; + templ.u.buf.first_element = first_element; + templ.u.buf.last_element = last_element; + templ.swizzle_r = PIPE_SWIZZLE_RED; + templ.swizzle_g = PIPE_SWIZZLE_GREEN; + templ.swizzle_b = PIPE_SWIZZLE_BLUE; + templ.swizzle_a = PIPE_SWIZZLE_ALPHA; + + sampler_view = pipe->create_sampler_view(pipe, buffer, &templ); + if (sampler_view == NULL) + return false; + } + + /* Begin setting state. This is the point of no return. */ + cso_save_fragment_sampler_views(st->cso_context); + cso_set_sampler_views(st->cso_context, PIPE_SHADER_FRAGMENT, 1, + &sampler_view); + + /* Framebuffer_state */ + { + struct pipe_framebuffer_state fb; + memset(&fb, 0, sizeof(fb)); + fb.width = surface->width; + fb.height = surface->height; + fb.nr_cbufs = 1; + pipe_surface_reference(&fb.cbufs[0], surface); + + cso_save_framebuffer(st->cso_context); + cso_set_framebuffer(st->cso_context, &fb); + + pipe_surface_reference(&fb.cbufs[0], NULL); + } + + /* Viewport state */ + { + struct pipe_viewport_state vp; + vp.scale[0] = 0.5f * surface->width; + vp.scale[1] = 0.5f * surface->height; + vp.scale[2] = 1.0f; + vp.translate[0] = 0.5f * surface->width; + vp.translate[1] = 0.5f * surface->height; + vp.translate[2] = 0.0f; + + cso_save_viewport(st->cso_context); + cso_set_viewport(st->cso_context, &vp); + } + + /* Blend state */ + cso_save_blend(st->cso_context); + cso_set_blend(st->cso_context, &st->pbo_upload.blend); + + /* Rasterizer state */ + cso_save_rasterizer(st->cso_context); + cso_set_rasterizer(st->cso_context, &st->pbo_upload.raster); + + /* Upload vertices */ + { + struct pipe_vertex_buffer vbo; + struct pipe_vertex_element velem; + + float x0 = (float) xoffset / surface->width * 2.0f - 1.0f; + float y0 = (float) yoffset / surface->height * 2.0f - 1.0f; + float x1 = (float) (xoffset + upload_width) / surface->width * 2.0f - 1.0f; + float y1 = (float) (yoffset + upload_height) / surface->height * 2.0f - 1.0f; + + float *verts = NULL; + + vbo.user_buffer = NULL; + vbo.buffer = NULL; + vbo.stride = 2 * sizeof(float); + + u_upload_alloc(st->uploader, 0, 8 * sizeof(float), 4, + &vbo.buffer_offset, &vbo.buffer, (void **) &verts); + + verts[0] = x0; + verts[1] = y0; + verts[2] = x0; + verts[3] = y1; + verts[4] = x1; + verts[5] = y0; + verts[6] = x1; + verts[7] = y1; + + u_upload_unmap(st->uploader); + + velem.src_offset = 0; + velem.instance_divisor = 0; + velem.vertex_buffer_index = cso_get_aux_vertex_buffer_slot(st->cso_context); + velem.src_format = PIPE_FORMAT_R32G32_FLOAT; + + cso_save_vertex_elements(st->cso_context); + cso_set_vertex_elements(st->cso_context, 1, &velem); + + cso_save_aux_vertex_buffer_slot(st->cso_context); + cso_set_vertex_buffers(st->cso_context, velem.vertex_buffer_index, + 1, &vbo); + } + + /* Upload constants */ + { + struct pipe_constant_buffer cb; + + struct { + int32_t xoffset; + int32_t yoffset; + int32_t stride; + int32_t pad; + } constants; + + constants.xoffset = -xoffset + skip_pixels; + constants.yoffset = -yoffset; + constants.stride = stride; + constants.pad = 0; + + if (st->constbuf_uploader) { + cb.buffer = NULL; + cb.user_buffer = NULL; + u_upload_data(st->constbuf_uploader, 0, sizeof(constants), + st->ctx->Const.UniformBufferOffsetAlignment, + &constants, &cb.buffer_offset, &cb.buffer); + u_upload_unmap(st->constbuf_uploader); + } else { + cb.buffer = NULL; + cb.user_buffer = &constants; + cb.buffer_offset = 0; + } + cb.buffer_size = sizeof(constants); + + cso_save_constant_buffer_slot0(st->cso_context, PIPE_SHADER_FRAGMENT); + cso_set_constant_buffer(st->cso_context, PIPE_SHADER_FRAGMENT, 0, &cb); + } + + /* Set up the shaders */ + cso_save_vertex_shader(st->cso_context); + cso_set_vertex_shader_handle(st->cso_context, st->pbo_upload.vs); + + cso_save_geometry_shader(st->cso_context); + cso_set_geometry_shader_handle(st->cso_context, NULL); + + cso_save_tessctrl_shader(st->cso_context); + cso_set_tessctrl_shader_handle(st->cso_context, NULL); + + cso_save_tesseval_shader(st->cso_context); + cso_set_tesseval_shader_handle(st->cso_context, NULL); + + cso_save_fragment_shader(st->cso_context); + cso_set_fragment_shader_handle(st->cso_context, st->pbo_upload.fs); + + /* Disable stream output */ + cso_save_stream_outputs(st->cso_context); + cso_set_stream_outputs(st->cso_context, 0, NULL, 0); + + cso_draw_arrays(st->cso_context, PIPE_PRIM_TRIANGLE_STRIP, 0, 4); + + cso_restore_fragment_sampler_views(st->cso_context); + cso_restore_framebuffer(st->cso_context); + cso_restore_viewport(st->cso_context); + cso_restore_blend(st->cso_context); + cso_restore_rasterizer(st->cso_context); + cso_restore_vertex_elements(st->cso_context); + cso_restore_aux_vertex_buffer_slot(st->cso_context); + cso_restore_constant_buffer_slot0(st->cso_context, PIPE_SHADER_FRAGMENT); + cso_restore_vertex_shader(st->cso_context); + cso_restore_geometry_shader(st->cso_context); + cso_restore_tessctrl_shader(st->cso_context); + cso_restore_tesseval_shader(st->cso_context); + cso_restore_fragment_shader(st->cso_context); + cso_restore_stream_outputs(st->cso_context); + + pipe_sampler_view_reference(&sampler_view, NULL); + + return true; +} + +static bool +try_pbo_upload(struct gl_context *ctx, GLuint dims, + struct gl_texture_image *texImage, + GLenum format, GLenum type, + enum pipe_format dst_format, + GLint xoffset, GLint yoffset, GLint zoffset, + GLint width, GLint height, GLint depth, + const void *pixels, + const struct gl_pixelstore_attrib *unpack) +{ + struct st_context *st = st_context(ctx); + struct st_texture_image *stImage = st_texture_image(texImage); + struct st_texture_object *stObj = st_texture_object(texImage->TexObject); + struct pipe_resource *texture = stImage->pt; + struct pipe_context *pipe = st->pipe; + struct pipe_screen *screen = pipe->screen; + struct pipe_surface *surface = NULL; + enum pipe_format src_format; + const struct util_format_description *desc; + GLenum gl_target = texImage->TexObject->Target; + intptr_t buf_offset; + unsigned bytes_per_pixel; + unsigned stride, image_height; + bool success; + + if (!st->pbo_upload.enabled) + return false; + + /* From now on, we need the gallium representation of dimensions. */ + if (gl_target == GL_TEXTURE_1D_ARRAY) { + depth = height; + height = 1; + zoffset = yoffset; + yoffset = 0; + image_height = 1; + } else { + image_height = unpack->ImageHeight > 0 ? unpack->ImageHeight : height; + } + + /* XXX We only support updating a single layer */ + if (depth != 1) + return false; + + /* Choose the source format. Initially, we do so without checking driver + * support at all because of the remapping we later perform and because + * at least the Radeon driver actually supports some formats for texture + * buffers which it doesn't support for regular textures. */ + src_format = st_choose_matching_format(st, 0, format, type, unpack->SwapBytes); + if (!src_format) { + return false; + } + + src_format = util_format_linear(src_format); + desc = util_format_description(src_format); + + if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) + return false; + + if (desc->colorspace != UTIL_FORMAT_COLORSPACE_RGB) + return false; + + if (st->pbo_upload.rgba_only) { + enum pipe_format orig_dst_format = dst_format; + + if (!reinterpret_formats(&src_format, &dst_format)) { + return false; + } + + if (dst_format != orig_dst_format && + !screen->is_format_supported(screen, dst_format, PIPE_TEXTURE_2D, 0, + PIPE_BIND_RENDER_TARGET)) { + return false; + } + } + + if (!src_format || + !screen->is_format_supported(screen, src_format, PIPE_BUFFER, 0, + PIPE_BIND_SAMPLER_VIEW)) { + return false; + } + + /* Check if the offset satisfies the alignment requirements */ + buf_offset = (intptr_t) pixels; + bytes_per_pixel = desc->block.bits / 8; + + if (buf_offset % bytes_per_pixel) { + return false; + } + + /* Convert to texels */ + buf_offset = buf_offset / bytes_per_pixel; + + /* Compute the stride, taking unpack->Alignment into account */ + { + unsigned pixels_per_row = unpack->RowLength > 0 ? + unpack->RowLength : width; + unsigned bytes_per_row = pixels_per_row * bytes_per_pixel; + unsigned remainder = bytes_per_row % unpack->Alignment; + unsigned offset_rows; + + if (remainder > 0) + bytes_per_row += (unpack->Alignment - remainder); + + if (bytes_per_row % bytes_per_pixel) { + return false; + } + + stride = bytes_per_row / bytes_per_pixel; + + offset_rows = unpack->SkipRows; + if (dims == 3) + offset_rows += image_height * unpack->SkipImages; + + buf_offset += unpack->SkipPixels + stride * offset_rows; + } + + /* Set up the surface */ + { + unsigned level = stObj->pt != stImage->pt ? 0 : texImage->TexObject->MinLevel + texImage->Level; + unsigned max_layer = util_max_layer(texture, level); + + zoffset += texImage->Face + texImage->TexObject->MinLayer; + + struct pipe_surface templ; + memset(&templ, 0, sizeof(templ)); + templ.format = dst_format; + templ.u.tex.level = level; + templ.u.tex.first_layer = MIN2(zoffset, max_layer); + templ.u.tex.last_layer = MIN2(zoffset + depth - 1, max_layer); + + surface = pipe->create_surface(pipe, texture, &templ); + if (!surface) + return false; + } + + success = try_pbo_upload_common(ctx, surface, + xoffset, yoffset, width, height, + st_buffer_object(unpack->BufferObj)->buffer, + src_format, + buf_offset, + bytes_per_pixel, stride, image_height); + + pipe_surface_reference(&surface, NULL); + + return success; +} static void st_TexSubImage(struct gl_context *ctx, GLuint dims, @@ -735,21 +1598,15 @@ st_TexSubImage(struct gl_context *ctx, GLuint dims, goto fallback; } - /* See if the texture format already matches the format and type, - * in which case the memcpy-based fast path will likely be used and - * we don't have to blit. */ - if (_mesa_format_matches_format_and_type(texImage->TexFormat, format, - type, unpack->SwapBytes, NULL)) { - goto fallback; - } + /* See if the destination format is supported. */ if (format == GL_DEPTH_COMPONENT || format == GL_DEPTH_STENCIL) bind = PIPE_BIND_DEPTH_STENCIL; else bind = PIPE_BIND_RENDER_TARGET; - /* See if the destination format is supported. - * For luminance and intensity, only the red channel is stored there. */ + /* For luminance and intensity, only the red channel is stored + * in the destination. */ dst_format = util_format_linear(dst->format); dst_format = util_format_luminance_to_red(dst_format); dst_format = util_format_intensity_to_red(dst_format); @@ -760,6 +1617,21 @@ st_TexSubImage(struct gl_context *ctx, GLuint dims, goto fallback; } + if (_mesa_is_bufferobj(unpack->BufferObj)) { + if (try_pbo_upload(ctx, dims, texImage, format, type, dst_format, + xoffset, yoffset, zoffset, + width, height, depth, pixels, unpack)) + return; + } + + /* See if the texture format already matches the format and type, + * in which case the memcpy-based fast path will likely be used and + * we don't have to blit. */ + if (_mesa_format_matches_format_and_type(texImage->TexFormat, format, + type, unpack->SwapBytes, NULL)) { + goto fallback; + } + /* Choose the source format. */ src_format = st_choose_matching_format(st, PIPE_BIND_SAMPLER_VIEW, format, type, unpack->SwapBytes); diff --git a/src/mesa/state_tracker/st_cb_texture.h b/src/mesa/state_tracker/st_cb_texture.h index 1b685298c5f..55c86c401e2 100644 --- a/src/mesa/state_tracker/st_cb_texture.h +++ b/src/mesa/state_tracker/st_cb_texture.h @@ -53,5 +53,10 @@ st_finalize_texture(struct gl_context *ctx, extern void st_init_texture_functions(struct dd_function_table *functions); +extern void +st_init_pbo_upload(struct st_context *st); + +extern void +st_destroy_pbo_upload(struct st_context *st); #endif /* ST_CB_TEXTURE_H */ diff --git a/src/mesa/state_tracker/st_context.c b/src/mesa/state_tracker/st_context.c index ff9135f0e6e..6c50a31dbd2 100644 --- a/src/mesa/state_tracker/st_context.c +++ b/src/mesa/state_tracker/st_context.c @@ -136,6 +136,7 @@ st_destroy_context_priv(struct st_context *st) st_destroy_drawpix(st); st_destroy_drawtex(st); st_destroy_perfmon(st); + st_destroy_pbo_upload(st); for (shader = 0; shader < ARRAY_SIZE(st->state.sampler_views); shader++) { for (i = 0; i < ARRAY_SIZE(st->state.sampler_views[0]); i++) { @@ -209,6 +210,7 @@ st_create_context_priv( struct gl_context *ctx, struct pipe_context *pipe, st_init_bitmap(st); st_init_clear(st); st_init_draw( st ); + st_init_pbo_upload(st); /* Choose texture target for glDrawPixels, glBitmap, renderbuffers */ if (pipe->screen->get_param(pipe->screen, PIPE_CAP_NPOT_TEXTURES)) diff --git a/src/mesa/state_tracker/st_context.h b/src/mesa/state_tracker/st_context.h index 9c6980fc425..fa0fa104a9a 100644 --- a/src/mesa/state_tracker/st_context.h +++ b/src/mesa/state_tracker/st_context.h @@ -204,6 +204,16 @@ struct st_context void *gs_layered; } clear; + /* For gl(Compressed)Tex(Sub)Image */ + struct { + struct pipe_rasterizer_state raster; + struct pipe_blend_state blend; + void *vs; + void *fs; + bool enabled; + bool rgba_only; + } pbo_upload; + /** used for anything using util_draw_vertex_buffer */ struct pipe_vertex_element velems_util_draw[3]; -- 2.30.2