#include "state_tracker/st_context.h"
#include "state_tracker/st_pbo.h"
+#include "state_tracker/st_cb_bufferobjects.h"
#include "pipe/p_context.h"
#include "pipe/p_defines.h"
#include "pipe/p_screen.h"
#include "cso_cache/cso_context.h"
#include "tgsi/tgsi_ureg.h"
+#include "util/u_format.h"
+#include "util/u_inlines.h"
+#include "util/u_upload_mgr.h"
+
+/* Conversion to apply in the fragment shader. */
+enum st_pbo_conversion {
+ ST_PBO_CONVERT_NONE = 0,
+ ST_PBO_CONVERT_UINT_TO_SINT,
+ ST_PBO_CONVERT_SINT_TO_UINT,
+
+ ST_NUM_PBO_CONVERSIONS
+};
+
+/* Final setup of buffer addressing information.
+ *
+ * buf_offset is in pixels.
+ *
+ * Returns false if something (e.g. alignment) prevents PBO upload/download.
+ */
+bool
+st_pbo_addresses_setup(struct st_context *st,
+ struct pipe_resource *buf, intptr_t buf_offset,
+ struct st_pbo_addresses *addr)
+{
+ unsigned skip_pixels;
+
+ /* Check alignment against texture buffer requirements. */
+ {
+ unsigned ofs = (buf_offset * addr->bytes_per_pixel) % st->ctx->Const.TextureBufferOffsetAlignment;
+ if (ofs != 0) {
+ if (ofs % addr->bytes_per_pixel != 0)
+ return false;
+
+ skip_pixels = ofs / addr->bytes_per_pixel;
+ buf_offset -= skip_pixels;
+ } else {
+ skip_pixels = 0;
+ }
+ }
+
+ assert(buf_offset >= 0);
+
+ addr->buffer = buf;
+ addr->first_element = buf_offset;
+ addr->last_element = buf_offset + skip_pixels + addr->width - 1
+ + (addr->height - 1 + (addr->depth - 1) * addr->image_height) * addr->pixels_per_row;
+
+ if (addr->last_element - addr->first_element > st->ctx->Const.MaxTextureBufferSize - 1)
+ return false;
+
+ /* This should be ensured by Mesa before calling our callbacks */
+ assert((addr->last_element + 1) * addr->bytes_per_pixel <= buf->width0);
+
+ addr->constants.xoffset = -addr->xoffset + skip_pixels;
+ addr->constants.yoffset = -addr->yoffset;
+ addr->constants.stride = addr->pixels_per_row;
+ addr->constants.image_size = addr->pixels_per_row * addr->image_height;
+ addr->constants.layer_offset = 0;
+
+ return true;
+}
+
+/* Validate and fill buffer addressing information based on GL pixelstore
+ * attributes.
+ *
+ * Returns false if some aspect of the addressing (e.g. alignment) prevents
+ * PBO upload/download.
+ */
+bool
+st_pbo_addresses_pixelstore(struct st_context *st,
+ GLenum gl_target, bool skip_images,
+ const struct gl_pixelstore_attrib *store,
+ const void *pixels,
+ struct st_pbo_addresses *addr)
+{
+ struct pipe_resource *buf = st_buffer_object(store->BufferObj)->buffer;
+ intptr_t buf_offset = (intptr_t) pixels;
+
+ if (buf_offset % addr->bytes_per_pixel)
+ return false;
+
+ /* Convert to texels */
+ buf_offset = buf_offset / addr->bytes_per_pixel;
+
+ /* Determine image height */
+ if (gl_target == GL_TEXTURE_1D_ARRAY) {
+ addr->image_height = 1;
+ } else {
+ addr->image_height = store->ImageHeight > 0 ? store->ImageHeight : addr->height;
+ }
+
+ /* Compute the stride, taking store->Alignment into account */
+ {
+ unsigned pixels_per_row = store->RowLength > 0 ?
+ store->RowLength : addr->width;
+ unsigned bytes_per_row = pixels_per_row * addr->bytes_per_pixel;
+ unsigned remainder = bytes_per_row % store->Alignment;
+ unsigned offset_rows;
+
+ if (remainder > 0)
+ bytes_per_row += store->Alignment - remainder;
+
+ if (bytes_per_row % addr->bytes_per_pixel)
+ return false;
+
+ addr->pixels_per_row = bytes_per_row / addr->bytes_per_pixel;
+
+ offset_rows = store->SkipRows;
+ if (skip_images)
+ offset_rows += addr->image_height * store->SkipImages;
+
+ buf_offset += store->SkipPixels + addr->pixels_per_row * offset_rows;
+ }
+
+ if (!st_pbo_addresses_setup(st, buf, buf_offset, addr))
+ return false;
+
+ /* Support GL_PACK_INVERT_MESA */
+ if (store->Invert) {
+ addr->constants.xoffset += (addr->height - 1) * addr->constants.stride;
+ addr->constants.stride = -addr->constants.stride;
+ }
+
+ return true;
+}
+
+/* For download from a framebuffer, we may have to invert the Y axis. The
+ * setup is as follows:
+ * - set viewport to inverted, so that the position sysval is correct for
+ * texel fetches
+ * - this function adjusts the fragment shader's constant buffer to compute
+ * the correct destination addresses.
+ */
+void
+st_pbo_addresses_invert_y(struct st_pbo_addresses *addr,
+ unsigned viewport_height)
+{
+ addr->constants.xoffset +=
+ (viewport_height - 1 + 2 * addr->constants.yoffset) * addr->constants.stride;
+ addr->constants.stride = -addr->constants.stride;
+}
+
+/* Setup all vertex pipeline state, rasterizer state, and fragment shader
+ * constants, and issue the draw call for PBO upload/download.
+ *
+ * The caller is responsible for saving and restoring state, as well as for
+ * setting other fragment shader state (fragment shader, samplers), and
+ * framebuffer/viewport/DSA/blend state.
+ */
+bool
+st_pbo_draw(struct st_context *st, const struct st_pbo_addresses *addr,
+ unsigned surface_width, unsigned surface_height)
+{
+ struct cso_context *cso = st->cso_context;
+
+ /* Setup vertex and geometry shaders */
+ if (!st->pbo.vs) {
+ st->pbo.vs = st_pbo_create_vs(st);
+ if (!st->pbo.vs)
+ return false;
+ }
+
+ if (addr->depth != 1 && st->pbo.use_gs && !st->pbo.gs) {
+ st->pbo.gs = st_pbo_create_gs(st);
+ if (!st->pbo.gs)
+ return false;
+ }
+
+ cso_set_vertex_shader_handle(cso, st->pbo.vs);
+
+ cso_set_geometry_shader_handle(cso, addr->depth != 1 ? st->pbo.gs : NULL);
+
+ cso_set_tessctrl_shader_handle(cso, NULL);
+
+ cso_set_tesseval_shader_handle(cso, NULL);
+
+ /* Upload vertices */
+ {
+ struct pipe_vertex_buffer vbo = {0};
+ struct pipe_vertex_element velem;
+
+ float x0 = (float) addr->xoffset / surface_width * 2.0f - 1.0f;
+ float y0 = (float) addr->yoffset / surface_height * 2.0f - 1.0f;
+ float x1 = (float) (addr->xoffset + addr->width) / surface_width * 2.0f - 1.0f;
+ float y1 = (float) (addr->yoffset + addr->height) / surface_height * 2.0f - 1.0f;
+
+ float *verts = NULL;
+
+ vbo.stride = 2 * sizeof(float);
+
+ u_upload_alloc(st->pipe->stream_uploader, 0, 8 * sizeof(float), 4,
+ &vbo.buffer_offset, &vbo.buffer.resource, (void **) &verts);
+ if (!verts)
+ return false;
+
+ verts[0] = x0;
+ verts[1] = y0;
+ verts[2] = x0;
+ verts[3] = y1;
+ verts[4] = x1;
+ verts[5] = y0;
+ verts[6] = x1;
+ verts[7] = y1;
+
+ u_upload_unmap(st->pipe->stream_uploader);
+
+ velem.src_offset = 0;
+ velem.instance_divisor = 0;
+ velem.vertex_buffer_index = 0;
+ velem.src_format = PIPE_FORMAT_R32G32_FLOAT;
+
+ cso_set_vertex_elements(cso, 1, &velem);
+
+ cso_set_vertex_buffers(cso, velem.vertex_buffer_index, 1, &vbo);
+
+ pipe_resource_reference(&vbo.buffer.resource, NULL);
+ }
+
+ /* Upload constants */
+ {
+ struct pipe_constant_buffer cb;
+
+ cb.buffer = NULL;
+ cb.user_buffer = &addr->constants;
+ cb.buffer_offset = 0;
+ cb.buffer_size = sizeof(addr->constants);
+
+ cso_set_constant_buffer(cso, PIPE_SHADER_FRAGMENT, 0, &cb);
+
+ pipe_resource_reference(&cb.buffer, NULL);
+ }
+
+ /* Rasterizer state */
+ cso_set_rasterizer(cso, &st->pbo.raster);
+
+ /* Disable stream output */
+ cso_set_stream_outputs(cso, 0, NULL, 0);
+
+ if (addr->depth == 1) {
+ cso_draw_arrays(cso, PIPE_PRIM_TRIANGLE_STRIP, 0, 4);
+ } else {
+ cso_draw_arrays_instanced(cso, PIPE_PRIM_TRIANGLE_STRIP,
+ 0, 4, 0, addr->depth);
+ }
+
+ return true;
+}
void *
st_pbo_create_vs(struct st_context *st)
ureg_scalar(in_instanceid, TGSI_SWIZZLE_X));
} else {
/* out_layer = gl_InstanceID */
- ureg_MOV(ureg, out_layer, in_instanceid);
+ ureg_MOV(ureg, ureg_writemask(out_layer, TGSI_WRITEMASK_X),
+ ureg_scalar(in_instanceid, TGSI_SWIZZLE_X));
}
}
return ureg_create_shader_and_destroy(ureg, st->pipe);
}
+static void
+build_conversion(struct ureg_program *ureg, const struct ureg_dst *temp,
+ enum st_pbo_conversion conversion)
+{
+ switch (conversion) {
+ case ST_PBO_CONVERT_SINT_TO_UINT:
+ ureg_IMAX(ureg, *temp, ureg_src(*temp), ureg_imm1i(ureg, 0));
+ break;
+ case ST_PBO_CONVERT_UINT_TO_SINT:
+ ureg_UMIN(ureg, *temp, ureg_src(*temp), ureg_imm1u(ureg, (1u << 31) - 1));
+ break;
+ default:
+ /* no-op */
+ break;
+ }
+}
+
+static void *
+create_fs(struct st_context *st, bool download, enum pipe_texture_target target,
+ enum st_pbo_conversion conversion)
+{
+ struct pipe_context *pipe = st->pipe;
+ struct pipe_screen *screen = pipe->screen;
+ struct ureg_program *ureg;
+ bool have_layer;
+ struct ureg_dst out;
+ struct ureg_src sampler;
+ struct ureg_src pos;
+ struct ureg_src layer;
+ struct ureg_src const0;
+ struct ureg_src const1;
+ struct ureg_dst temp0;
+
+ have_layer =
+ st->pbo.layers &&
+ (!download || target == PIPE_TEXTURE_1D_ARRAY
+ || target == PIPE_TEXTURE_2D_ARRAY
+ || target == PIPE_TEXTURE_3D
+ || target == PIPE_TEXTURE_CUBE
+ || target == PIPE_TEXTURE_CUBE_ARRAY);
+
+ ureg = ureg_create(PIPE_SHADER_FRAGMENT);
+ if (!ureg)
+ return NULL;
+
+ if (!download) {
+ out = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0);
+ } else {
+ struct ureg_src image;
+
+ /* writeonly images do not require an explicitly given format. */
+ image = ureg_DECL_image(ureg, 0, TGSI_TEXTURE_BUFFER, PIPE_FORMAT_NONE,
+ true, false);
+ out = ureg_dst(image);
+ }
+
+ sampler = ureg_DECL_sampler(ureg, 0);
+ if (screen->get_param(screen, PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL)) {
+ pos = ureg_DECL_system_value(ureg, TGSI_SEMANTIC_POSITION, 0);
+ } else {
+ pos = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_POSITION, 0,
+ TGSI_INTERPOLATE_LINEAR);
+ }
+ if (have_layer) {
+ layer = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_LAYER, 0,
+ TGSI_INTERPOLATE_CONSTANT);
+ }
+ const0 = ureg_DECL_constant(ureg, 0);
+ const1 = ureg_DECL_constant(ureg, 1);
+ temp0 = ureg_DECL_temporary(ureg);
+
+ /* Note: const0 = [ -xoffset + skip_pixels, -yoffset, stride, image_height ] */
+
+ /* temp0.xy = f2i(temp0.xy) */
+ ureg_F2I(ureg, ureg_writemask(temp0, TGSI_WRITEMASK_XY),
+ ureg_swizzle(pos,
+ TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y,
+ TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Y));
+
+ /* temp0.xy = temp0.xy + const0.xy */
+ ureg_UADD(ureg, ureg_writemask(temp0, TGSI_WRITEMASK_XY),
+ ureg_swizzle(ureg_src(temp0),
+ TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y,
+ TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Y),
+ ureg_swizzle(const0,
+ TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y,
+ TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Y));
+
+ /* temp0.x = const0.z * temp0.y + temp0.x */
+ ureg_UMAD(ureg, ureg_writemask(temp0, TGSI_WRITEMASK_X),
+ ureg_scalar(const0, TGSI_SWIZZLE_Z),
+ ureg_scalar(ureg_src(temp0), TGSI_SWIZZLE_Y),
+ ureg_scalar(ureg_src(temp0), TGSI_SWIZZLE_X));
+
+ if (have_layer) {
+ /* temp0.x = const0.w * layer + temp0.x */
+ ureg_UMAD(ureg, ureg_writemask(temp0, TGSI_WRITEMASK_X),
+ ureg_scalar(const0, TGSI_SWIZZLE_W),
+ ureg_scalar(layer, TGSI_SWIZZLE_X),
+ ureg_scalar(ureg_src(temp0), TGSI_SWIZZLE_X));
+ }
+
+ /* temp0.w = 0 */
+ ureg_MOV(ureg, ureg_writemask(temp0, TGSI_WRITEMASK_W), ureg_imm1u(ureg, 0));
+
+ if (download) {
+ struct ureg_dst temp1;
+ struct ureg_src op[2];
+
+ temp1 = ureg_DECL_temporary(ureg);
+
+ /* temp1.xy = pos.xy */
+ ureg_F2I(ureg, ureg_writemask(temp1, TGSI_WRITEMASK_XY), pos);
+
+ /* temp1.zw = 0 */
+ ureg_MOV(ureg, ureg_writemask(temp1, TGSI_WRITEMASK_ZW), ureg_imm1u(ureg, 0));
+
+ if (have_layer) {
+ struct ureg_dst temp1_layer =
+ ureg_writemask(temp1, target == PIPE_TEXTURE_1D_ARRAY ? TGSI_WRITEMASK_Y
+ : TGSI_WRITEMASK_Z);
+
+ /* temp1.y/z = layer */
+ ureg_MOV(ureg, temp1_layer, ureg_scalar(layer, TGSI_SWIZZLE_X));
+
+ if (target == PIPE_TEXTURE_3D) {
+ /* temp1.z += layer_offset */
+ ureg_UADD(ureg, temp1_layer,
+ ureg_scalar(ureg_src(temp1), TGSI_SWIZZLE_Z),
+ ureg_scalar(const1, TGSI_SWIZZLE_X));
+ }
+ }
+
+ /* temp1 = txf(sampler, temp1) */
+ ureg_TXF(ureg, temp1, util_pipe_tex_to_tgsi_tex(target, 1),
+ ureg_src(temp1), sampler);
+
+ build_conversion(ureg, &temp1, conversion);
+
+ /* store(out, temp0, temp1) */
+ op[0] = ureg_src(temp0);
+ op[1] = ureg_src(temp1);
+ ureg_memory_insn(ureg, TGSI_OPCODE_STORE, &out, 1, op, 2, 0,
+ TGSI_TEXTURE_BUFFER, PIPE_FORMAT_NONE);
+
+ ureg_release_temporary(ureg, temp1);
+ } else {
+ /* out = txf(sampler, temp0.x) */
+ ureg_TXF(ureg, temp0, TGSI_TEXTURE_BUFFER, ureg_src(temp0), sampler);
+
+ build_conversion(ureg, &temp0, conversion);
+
+ ureg_MOV(ureg, out, ureg_src(temp0));
+ }
+
+ ureg_release_temporary(ureg, temp0);
+
+ ureg_END(ureg);
+
+ return ureg_create_shader_and_destroy(ureg, pipe);
+}
+
+static enum st_pbo_conversion
+get_pbo_conversion(enum pipe_format src_format, enum pipe_format dst_format)
+{
+ if (util_format_is_pure_uint(src_format)) {
+ if (util_format_is_pure_sint(dst_format))
+ return ST_PBO_CONVERT_UINT_TO_SINT;
+ } else if (util_format_is_pure_sint(src_format)) {
+ if (util_format_is_pure_uint(dst_format))
+ return ST_PBO_CONVERT_SINT_TO_UINT;
+ }
+
+ return ST_PBO_CONVERT_NONE;
+}
+
+void *
+st_pbo_get_upload_fs(struct st_context *st,
+ enum pipe_format src_format,
+ enum pipe_format dst_format)
+{
+ STATIC_ASSERT(ARRAY_SIZE(st->pbo.upload_fs) == ST_NUM_PBO_CONVERSIONS);
+
+ enum st_pbo_conversion conversion = get_pbo_conversion(src_format, dst_format);
+
+ if (!st->pbo.upload_fs[conversion])
+ st->pbo.upload_fs[conversion] = create_fs(st, false, 0, conversion);
+
+ return st->pbo.upload_fs[conversion];
+}
+
+void *
+st_pbo_get_download_fs(struct st_context *st, enum pipe_texture_target target,
+ enum pipe_format src_format,
+ enum pipe_format dst_format)
+{
+ STATIC_ASSERT(ARRAY_SIZE(st->pbo.download_fs) == ST_NUM_PBO_CONVERSIONS);
+ assert(target < PIPE_MAX_TEXTURE_TYPES);
+
+ enum st_pbo_conversion conversion = get_pbo_conversion(src_format, dst_format);
+
+ if (!st->pbo.download_fs[conversion][target])
+ st->pbo.download_fs[conversion][target] = create_fs(st, true, target, conversion);
+
+ return st->pbo.download_fs[conversion][target];
+}
+
void
st_init_pbo_helpers(struct st_context *st)
{
if (!st->pbo.upload_enabled)
return;
+ st->pbo.download_enabled =
+ st->pbo.upload_enabled &&
+ screen->get_param(screen, PIPE_CAP_SAMPLER_VIEW_TARGET) &&
+ screen->get_param(screen, PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT) &&
+ screen->get_shader_param(screen, PIPE_SHADER_FRAGMENT,
+ PIPE_SHADER_CAP_MAX_SHADER_IMAGES) >= 1;
+
st->pbo.rgba_only =
screen->get_param(screen, PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY);
void
st_destroy_pbo_helpers(struct st_context *st)
{
- if (st->pbo.upload_fs) {
- cso_delete_fragment_shader(st->cso_context, st->pbo.upload_fs);
- st->pbo.upload_fs = NULL;
+ unsigned i;
+
+ for (i = 0; i < ARRAY_SIZE(st->pbo.upload_fs); ++i) {
+ if (st->pbo.upload_fs[i]) {
+ cso_delete_fragment_shader(st->cso_context, st->pbo.upload_fs[i]);
+ st->pbo.upload_fs[i] = NULL;
+ }
+ }
+
+ for (i = 0; i < ARRAY_SIZE(st->pbo.download_fs); ++i) {
+ for (unsigned j = 0; j < ARRAY_SIZE(st->pbo.download_fs[0]); ++j) {
+ if (st->pbo.download_fs[i][j]) {
+ cso_delete_fragment_shader(st->cso_context, st->pbo.download_fs[i][j]);
+ st->pbo.download_fs[i][j] = NULL;
+ }
+ }
}
if (st->pbo.gs) {