2 * Copyright 2007 VMware, Inc.
3 * Copyright 2016 Advanced Micro Devices, Inc.
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE.
28 * Common helper functions for PBO up- and downloads.
31 #include "state_tracker/st_context.h"
32 #include "state_tracker/st_pbo.h"
33 #include "state_tracker/st_cb_bufferobjects.h"
35 #include "pipe/p_context.h"
36 #include "pipe/p_defines.h"
37 #include "pipe/p_screen.h"
38 #include "cso_cache/cso_context.h"
39 #include "tgsi/tgsi_ureg.h"
40 #include "util/u_inlines.h"
41 #include "util/u_upload_mgr.h"
43 /* Final setup of buffer addressing information.
45 * buf_offset is in pixels.
47 * Returns false if something (e.g. alignment) prevents PBO upload/download.
50 st_pbo_addresses_setup(struct st_context
*st
,
51 struct pipe_resource
*buf
, intptr_t buf_offset
,
52 struct st_pbo_addresses
*addr
)
56 /* Check alignment against texture buffer requirements. */
58 unsigned ofs
= (buf_offset
* addr
->bytes_per_pixel
) % st
->ctx
->Const
.TextureBufferOffsetAlignment
;
60 if (ofs
% addr
->bytes_per_pixel
!= 0)
63 skip_pixels
= ofs
/ addr
->bytes_per_pixel
;
64 buf_offset
-= skip_pixels
;
70 assert(buf_offset
>= 0);
73 addr
->first_element
= buf_offset
;
74 addr
->last_element
= buf_offset
+ skip_pixels
+ addr
->width
- 1
75 + (addr
->height
- 1 + (addr
->depth
- 1) * addr
->image_height
) * addr
->pixels_per_row
;
77 if (addr
->last_element
- addr
->first_element
> st
->ctx
->Const
.MaxTextureBufferSize
- 1)
80 /* This should be ensured by Mesa before calling our callbacks */
81 assert((addr
->last_element
+ 1) * addr
->bytes_per_pixel
<= buf
->width0
);
83 addr
->constants
.xoffset
= -addr
->xoffset
+ skip_pixels
;
84 addr
->constants
.yoffset
= -addr
->yoffset
;
85 addr
->constants
.stride
= addr
->pixels_per_row
;
86 addr
->constants
.image_size
= addr
->pixels_per_row
* addr
->image_height
;
87 addr
->constants
.layer_offset
= 0;
92 /* Validate and fill buffer addressing information based on GL pixelstore
95 * Returns false if some aspect of the addressing (e.g. alignment) prevents
96 * PBO upload/download.
99 st_pbo_addresses_pixelstore(struct st_context
*st
,
100 GLenum gl_target
, bool skip_images
,
101 const struct gl_pixelstore_attrib
*store
,
103 struct st_pbo_addresses
*addr
)
105 struct pipe_resource
*buf
= st_buffer_object(store
->BufferObj
)->buffer
;
106 intptr_t buf_offset
= (intptr_t) pixels
;
108 if (buf_offset
% addr
->bytes_per_pixel
)
111 /* Convert to texels */
112 buf_offset
= buf_offset
/ addr
->bytes_per_pixel
;
114 /* Determine image height */
115 if (gl_target
== GL_TEXTURE_1D_ARRAY
) {
116 addr
->image_height
= 1;
118 addr
->image_height
= store
->ImageHeight
> 0 ? store
->ImageHeight
: addr
->height
;
121 /* Compute the stride, taking store->Alignment into account */
123 unsigned pixels_per_row
= store
->RowLength
> 0 ?
124 store
->RowLength
: addr
->width
;
125 unsigned bytes_per_row
= pixels_per_row
* addr
->bytes_per_pixel
;
126 unsigned remainder
= bytes_per_row
% store
->Alignment
;
127 unsigned offset_rows
;
130 bytes_per_row
+= store
->Alignment
- remainder
;
132 if (bytes_per_row
% addr
->bytes_per_pixel
)
135 addr
->pixels_per_row
= bytes_per_row
/ addr
->bytes_per_pixel
;
137 offset_rows
= store
->SkipRows
;
139 offset_rows
+= addr
->image_height
* store
->SkipImages
;
141 buf_offset
+= store
->SkipPixels
+ addr
->pixels_per_row
* offset_rows
;
144 if (!st_pbo_addresses_setup(st
, buf
, buf_offset
, addr
))
147 /* Support GL_PACK_INVERT_MESA */
149 addr
->constants
.xoffset
+= (addr
->height
- 1) * addr
->constants
.stride
;
150 addr
->constants
.stride
= -addr
->constants
.stride
;
156 /* For download from a framebuffer, we may have to invert the Y axis. The
157 * setup is as follows:
158 * - set viewport to inverted, so that the position sysval is correct for
160 * - this function adjusts the fragment shader's constant buffer to compute
161 * the correct destination addresses.
164 st_pbo_addresses_invert_y(struct st_pbo_addresses
*addr
,
165 unsigned viewport_height
)
167 addr
->constants
.xoffset
+=
168 (viewport_height
- 1 + 2 * addr
->constants
.yoffset
) * addr
->constants
.stride
;
169 addr
->constants
.stride
= -addr
->constants
.stride
;
172 /* Setup all vertex pipeline state, rasterizer state, and fragment shader
173 * constants, and issue the draw call for PBO upload/download.
175 * The caller is responsible for saving and restoring state, as well as for
176 * setting other fragment shader state (fragment shader, samplers), and
177 * framebuffer/viewport/DSA/blend state.
180 st_pbo_draw(struct st_context
*st
, const struct st_pbo_addresses
*addr
,
181 unsigned surface_width
, unsigned surface_height
)
183 struct cso_context
*cso
= st
->cso_context
;
185 /* Setup vertex and geometry shaders */
187 st
->pbo
.vs
= st_pbo_create_vs(st
);
192 if (addr
->depth
!= 1 && st
->pbo
.use_gs
&& !st
->pbo
.gs
) {
193 st
->pbo
.gs
= st_pbo_create_gs(st
);
198 cso_set_vertex_shader_handle(cso
, st
->pbo
.vs
);
200 cso_set_geometry_shader_handle(cso
, addr
->depth
!= 1 ? st
->pbo
.gs
: NULL
);
202 cso_set_tessctrl_shader_handle(cso
, NULL
);
204 cso_set_tesseval_shader_handle(cso
, NULL
);
206 /* Upload vertices */
208 struct pipe_vertex_buffer vbo
;
209 struct pipe_vertex_element velem
;
211 float x0
= (float) addr
->xoffset
/ surface_width
* 2.0f
- 1.0f
;
212 float y0
= (float) addr
->yoffset
/ surface_height
* 2.0f
- 1.0f
;
213 float x1
= (float) (addr
->xoffset
+ addr
->width
) / surface_width
* 2.0f
- 1.0f
;
214 float y1
= (float) (addr
->yoffset
+ addr
->height
) / surface_height
* 2.0f
- 1.0f
;
218 vbo
.user_buffer
= NULL
;
220 vbo
.stride
= 2 * sizeof(float);
222 u_upload_alloc(st
->uploader
, 0, 8 * sizeof(float), 4,
223 &vbo
.buffer_offset
, &vbo
.buffer
, (void **) &verts
);
236 u_upload_unmap(st
->uploader
);
238 velem
.src_offset
= 0;
239 velem
.instance_divisor
= 0;
240 velem
.vertex_buffer_index
= cso_get_aux_vertex_buffer_slot(cso
);
241 velem
.src_format
= PIPE_FORMAT_R32G32_FLOAT
;
243 cso_set_vertex_elements(cso
, 1, &velem
);
245 cso_set_vertex_buffers(cso
, velem
.vertex_buffer_index
, 1, &vbo
);
247 pipe_resource_reference(&vbo
.buffer
, NULL
);
250 /* Upload constants */
252 struct pipe_constant_buffer cb
;
254 if (st
->constbuf_uploader
) {
256 cb
.user_buffer
= NULL
;
257 u_upload_data(st
->constbuf_uploader
, 0, sizeof(addr
->constants
),
258 st
->ctx
->Const
.UniformBufferOffsetAlignment
,
259 &addr
->constants
, &cb
.buffer_offset
, &cb
.buffer
);
263 u_upload_unmap(st
->constbuf_uploader
);
266 cb
.user_buffer
= &addr
->constants
;
267 cb
.buffer_offset
= 0;
269 cb
.buffer_size
= sizeof(addr
->constants
);
271 cso_set_constant_buffer(cso
, PIPE_SHADER_FRAGMENT
, 0, &cb
);
273 pipe_resource_reference(&cb
.buffer
, NULL
);
276 /* Rasterizer state */
277 cso_set_rasterizer(cso
, &st
->pbo
.raster
);
279 /* Disable stream output */
280 cso_set_stream_outputs(cso
, 0, NULL
, 0);
282 if (addr
->depth
== 1) {
283 cso_draw_arrays(cso
, PIPE_PRIM_TRIANGLE_STRIP
, 0, 4);
285 cso_draw_arrays_instanced(cso
, PIPE_PRIM_TRIANGLE_STRIP
,
286 0, 4, 0, addr
->depth
);
293 st_pbo_create_vs(struct st_context
*st
)
295 struct ureg_program
*ureg
;
296 struct ureg_src in_pos
;
297 struct ureg_src in_instanceid
;
298 struct ureg_dst out_pos
;
299 struct ureg_dst out_layer
;
301 ureg
= ureg_create(PIPE_SHADER_VERTEX
);
305 in_pos
= ureg_DECL_vs_input(ureg
, TGSI_SEMANTIC_POSITION
);
307 out_pos
= ureg_DECL_output(ureg
, TGSI_SEMANTIC_POSITION
, 0);
309 if (st
->pbo
.layers
) {
310 in_instanceid
= ureg_DECL_system_value(ureg
, TGSI_SEMANTIC_INSTANCEID
, 0);
313 out_layer
= ureg_DECL_output(ureg
, TGSI_SEMANTIC_LAYER
, 0);
316 /* out_pos = in_pos */
317 ureg_MOV(ureg
, out_pos
, in_pos
);
319 if (st
->pbo
.layers
) {
320 if (st
->pbo
.use_gs
) {
321 /* out_pos.z = i2f(gl_InstanceID) */
322 ureg_I2F(ureg
, ureg_writemask(out_pos
, TGSI_WRITEMASK_Z
),
323 ureg_scalar(in_instanceid
, TGSI_SWIZZLE_X
));
325 /* out_layer = gl_InstanceID */
326 ureg_MOV(ureg
, out_layer
, in_instanceid
);
332 return ureg_create_shader_and_destroy(ureg
, st
->pipe
);
336 st_pbo_create_gs(struct st_context
*st
)
338 static const int zero
= 0;
339 struct ureg_program
*ureg
;
340 struct ureg_dst out_pos
;
341 struct ureg_dst out_layer
;
342 struct ureg_src in_pos
;
346 ureg
= ureg_create(PIPE_SHADER_GEOMETRY
);
350 ureg_property(ureg
, TGSI_PROPERTY_GS_INPUT_PRIM
, PIPE_PRIM_TRIANGLES
);
351 ureg_property(ureg
, TGSI_PROPERTY_GS_OUTPUT_PRIM
, PIPE_PRIM_TRIANGLE_STRIP
);
352 ureg_property(ureg
, TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES
, 3);
354 out_pos
= ureg_DECL_output(ureg
, TGSI_SEMANTIC_POSITION
, 0);
355 out_layer
= ureg_DECL_output(ureg
, TGSI_SEMANTIC_LAYER
, 0);
357 in_pos
= ureg_DECL_input(ureg
, TGSI_SEMANTIC_POSITION
, 0, 0, 1);
359 imm
= ureg_DECL_immediate_int(ureg
, &zero
, 1);
361 for (i
= 0; i
< 3; ++i
) {
362 struct ureg_src in_pos_vertex
= ureg_src_dimension(in_pos
, i
);
364 /* out_pos = in_pos[i] */
365 ureg_MOV(ureg
, out_pos
, in_pos_vertex
);
367 /* out_layer.x = f2i(in_pos[i].z) */
368 ureg_F2I(ureg
, ureg_writemask(out_layer
, TGSI_WRITEMASK_X
),
369 ureg_scalar(in_pos_vertex
, TGSI_SWIZZLE_Z
));
371 ureg_EMIT(ureg
, ureg_scalar(imm
, TGSI_SWIZZLE_X
));
376 return ureg_create_shader_and_destroy(ureg
, st
->pipe
);
380 create_fs(struct st_context
*st
, bool download
, enum pipe_texture_target target
)
382 struct pipe_context
*pipe
= st
->pipe
;
383 struct pipe_screen
*screen
= pipe
->screen
;
384 struct ureg_program
*ureg
;
387 struct ureg_src sampler
;
389 struct ureg_src layer
;
390 struct ureg_src const0
;
391 struct ureg_src const1
;
392 struct ureg_dst temp0
;
396 (!download
|| target
== PIPE_TEXTURE_1D_ARRAY
397 || target
== PIPE_TEXTURE_2D_ARRAY
398 || target
== PIPE_TEXTURE_3D
399 || target
== PIPE_TEXTURE_CUBE
400 || target
== PIPE_TEXTURE_CUBE_ARRAY
);
402 ureg
= ureg_create(PIPE_SHADER_FRAGMENT
);
407 out
= ureg_DECL_output(ureg
, TGSI_SEMANTIC_COLOR
, 0);
409 struct ureg_src image
;
411 /* writeonly images do not require an explicitly given format. */
412 image
= ureg_DECL_image(ureg
, 0, TGSI_TEXTURE_BUFFER
, PIPE_FORMAT_NONE
,
414 out
= ureg_dst(image
);
417 sampler
= ureg_DECL_sampler(ureg
, 0);
418 if (screen
->get_param(screen
, PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL
)) {
419 pos
= ureg_DECL_system_value(ureg
, TGSI_SEMANTIC_POSITION
, 0);
421 pos
= ureg_DECL_fs_input(ureg
, TGSI_SEMANTIC_POSITION
, 0,
422 TGSI_INTERPOLATE_LINEAR
);
425 layer
= ureg_DECL_fs_input(ureg
, TGSI_SEMANTIC_LAYER
, 0,
426 TGSI_INTERPOLATE_CONSTANT
);
428 const0
= ureg_DECL_constant(ureg
, 0);
429 const1
= ureg_DECL_constant(ureg
, 1);
430 temp0
= ureg_DECL_temporary(ureg
);
432 /* Note: const0 = [ -xoffset + skip_pixels, -yoffset, stride, image_height ] */
434 /* temp0.xy = f2i(temp0.xy) */
435 ureg_F2I(ureg
, ureg_writemask(temp0
, TGSI_WRITEMASK_XY
),
437 TGSI_SWIZZLE_X
, TGSI_SWIZZLE_Y
,
438 TGSI_SWIZZLE_Y
, TGSI_SWIZZLE_Y
));
440 /* temp0.xy = temp0.xy + const0.xy */
441 ureg_UADD(ureg
, ureg_writemask(temp0
, TGSI_WRITEMASK_XY
),
442 ureg_swizzle(ureg_src(temp0
),
443 TGSI_SWIZZLE_X
, TGSI_SWIZZLE_Y
,
444 TGSI_SWIZZLE_Y
, TGSI_SWIZZLE_Y
),
446 TGSI_SWIZZLE_X
, TGSI_SWIZZLE_Y
,
447 TGSI_SWIZZLE_Y
, TGSI_SWIZZLE_Y
));
449 /* temp0.x = const0.z * temp0.y + temp0.x */
450 ureg_UMAD(ureg
, ureg_writemask(temp0
, TGSI_WRITEMASK_X
),
451 ureg_scalar(const0
, TGSI_SWIZZLE_Z
),
452 ureg_scalar(ureg_src(temp0
), TGSI_SWIZZLE_Y
),
453 ureg_scalar(ureg_src(temp0
), TGSI_SWIZZLE_X
));
456 /* temp0.x = const0.w * layer + temp0.x */
457 ureg_UMAD(ureg
, ureg_writemask(temp0
, TGSI_WRITEMASK_X
),
458 ureg_scalar(const0
, TGSI_SWIZZLE_W
),
459 ureg_scalar(layer
, TGSI_SWIZZLE_X
),
460 ureg_scalar(ureg_src(temp0
), TGSI_SWIZZLE_X
));
464 ureg_MOV(ureg
, ureg_writemask(temp0
, TGSI_WRITEMASK_W
), ureg_imm1u(ureg
, 0));
467 struct ureg_dst temp1
;
468 struct ureg_src op
[2];
470 temp1
= ureg_DECL_temporary(ureg
);
472 /* temp1.xy = pos.xy */
473 ureg_F2I(ureg
, ureg_writemask(temp1
, TGSI_WRITEMASK_XY
), pos
);
476 ureg_MOV(ureg
, ureg_writemask(temp1
, TGSI_WRITEMASK_ZW
), ureg_imm1u(ureg
, 0));
479 struct ureg_dst temp1_layer
=
480 ureg_writemask(temp1
, target
== PIPE_TEXTURE_1D_ARRAY
? TGSI_WRITEMASK_Y
483 /* temp1.y/z = layer */
484 ureg_MOV(ureg
, temp1_layer
, ureg_scalar(layer
, TGSI_SWIZZLE_X
));
486 if (target
== PIPE_TEXTURE_3D
) {
487 /* temp1.z += layer_offset */
488 ureg_UADD(ureg
, temp1_layer
,
489 ureg_scalar(ureg_src(temp1
), TGSI_SWIZZLE_Z
),
490 ureg_scalar(const1
, TGSI_SWIZZLE_X
));
494 /* temp1 = txf(sampler, temp1) */
495 ureg_TXF(ureg
, temp1
, util_pipe_tex_to_tgsi_tex(target
, 1),
496 ureg_src(temp1
), sampler
);
498 /* store(out, temp0, temp1) */
499 op
[0] = ureg_src(temp0
);
500 op
[1] = ureg_src(temp1
);
501 ureg_memory_insn(ureg
, TGSI_OPCODE_STORE
, &out
, 1, op
, 2, 0,
502 TGSI_TEXTURE_BUFFER
, PIPE_FORMAT_NONE
);
504 ureg_release_temporary(ureg
, temp1
);
506 /* out = txf(sampler, temp0.x) */
507 ureg_TXF(ureg
, out
, TGSI_TEXTURE_BUFFER
, ureg_src(temp0
), sampler
);
510 ureg_release_temporary(ureg
, temp0
);
514 return ureg_create_shader_and_destroy(ureg
, pipe
);
518 st_pbo_get_upload_fs(struct st_context
*st
)
520 if (!st
->pbo
.upload_fs
)
521 st
->pbo
.upload_fs
= create_fs(st
, false, 0);
523 return st
->pbo
.upload_fs
;
527 st_pbo_get_download_fs(struct st_context
*st
, enum pipe_texture_target target
)
529 assert(target
< PIPE_MAX_TEXTURE_TYPES
);
531 if (!st
->pbo
.download_fs
[target
])
532 st
->pbo
.download_fs
[target
] = create_fs(st
, true, target
);
534 return st
->pbo
.download_fs
[target
];
538 st_init_pbo_helpers(struct st_context
*st
)
540 struct pipe_context
*pipe
= st
->pipe
;
541 struct pipe_screen
*screen
= pipe
->screen
;
543 st
->pbo
.upload_enabled
=
544 screen
->get_param(screen
, PIPE_CAP_TEXTURE_BUFFER_OBJECTS
) &&
545 screen
->get_param(screen
, PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT
) >= 1 &&
546 screen
->get_shader_param(screen
, PIPE_SHADER_FRAGMENT
, PIPE_SHADER_CAP_INTEGERS
);
547 if (!st
->pbo
.upload_enabled
)
550 st
->pbo
.download_enabled
=
551 st
->pbo
.upload_enabled
&&
552 screen
->get_param(screen
, PIPE_CAP_SAMPLER_VIEW_TARGET
) &&
553 screen
->get_param(screen
, PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT
) &&
554 screen
->get_shader_param(screen
, PIPE_SHADER_FRAGMENT
,
555 PIPE_SHADER_CAP_MAX_SHADER_IMAGES
) >= 1;
558 screen
->get_param(screen
, PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY
);
560 if (screen
->get_param(screen
, PIPE_CAP_TGSI_INSTANCEID
)) {
561 if (screen
->get_param(screen
, PIPE_CAP_TGSI_VS_LAYER_VIEWPORT
)) {
562 st
->pbo
.layers
= true;
563 } else if (screen
->get_param(screen
, PIPE_CAP_MAX_GEOMETRY_OUTPUT_VERTICES
) >= 3) {
564 st
->pbo
.layers
= true;
565 st
->pbo
.use_gs
= true;
570 memset(&st
->pbo
.upload_blend
, 0, sizeof(struct pipe_blend_state
));
571 st
->pbo
.upload_blend
.rt
[0].colormask
= PIPE_MASK_RGBA
;
573 /* Rasterizer state */
574 memset(&st
->pbo
.raster
, 0, sizeof(struct pipe_rasterizer_state
));
575 st
->pbo
.raster
.half_pixel_center
= 1;
579 st_destroy_pbo_helpers(struct st_context
*st
)
583 if (st
->pbo
.upload_fs
) {
584 cso_delete_fragment_shader(st
->cso_context
, st
->pbo
.upload_fs
);
585 st
->pbo
.upload_fs
= NULL
;
588 for (i
= 0; i
< ARRAY_SIZE(st
->pbo
.download_fs
); ++i
) {
589 if (st
->pbo
.download_fs
[i
]) {
590 cso_delete_fragment_shader(st
->cso_context
, st
->pbo
.download_fs
[i
]);
591 st
->pbo
.download_fs
[i
] = NULL
;
596 cso_delete_geometry_shader(st
->cso_context
, st
->pbo
.gs
);
601 cso_delete_vertex_shader(st
->cso_context
, st
->pbo
.vs
);