1 /**************************************************************************
3 * Copyright 2009 VMware, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
29 #include "util/u_memory.h"
30 #include "util/u_math.h"
31 #include "util/u_rect.h"
32 #include "util/u_surface.h"
33 #include "util/u_pack_color.h"
35 #include "lp_scene_queue.h"
41 #include "lp_rast_priv.h"
42 #include "lp_tile_soa.h"
43 #include "gallivm/lp_bld_debug.h"
49 const struct lp_rast_state
*jit_state
= NULL
;
54 * Begin rasterizing a scene.
55 * Called once per scene by one thread.
58 lp_rast_begin( struct lp_rasterizer
*rast
,
59 struct lp_scene
*scene
)
62 rast
->curr_scene
= scene
;
64 LP_DBG(DEBUG_RAST
, "%s\n", __FUNCTION__
);
66 lp_scene_begin_rasterization( scene
);
67 lp_scene_bin_iter_begin( scene
);
72 lp_rast_end( struct lp_rasterizer
*rast
)
74 lp_scene_end_rasterization( rast
->curr_scene
);
76 rast
->curr_scene
= NULL
;
80 debug_printf("Post render scene: tile unswizzle: %u tile swizzle: %u\n",
81 lp_tile_unswizzle_count
, lp_tile_swizzle_count
);
87 * Begining rasterization of a tile.
88 * \param x window X position of the tile, in pixels
89 * \param y window Y position of the tile, in pixels
92 lp_rast_tile_begin(struct lp_rasterizer_task
*task
,
93 const struct cmd_bin
*bin
)
95 const struct lp_scene
*scene
= task
->scene
;
96 enum lp_texture_usage usage
;
98 LP_DBG(DEBUG_RAST
, "%s %d,%d\n", __FUNCTION__
, bin
->x
, bin
->y
);
101 task
->x
= bin
->x
* TILE_SIZE
;
102 task
->y
= bin
->y
* TILE_SIZE
;
104 /* reset pointers to color tile(s) */
105 memset(task
->color_tiles
, 0, sizeof(task
->color_tiles
));
107 /* get pointer to depth/stencil tile */
109 struct pipe_surface
*zsbuf
= task
->scene
->fb
.zsbuf
;
111 struct llvmpipe_resource
*lpt
= llvmpipe_resource(zsbuf
->texture
);
113 if (scene
->has_depthstencil_clear
)
114 usage
= LP_TEX_USAGE_WRITE_ALL
;
116 usage
= LP_TEX_USAGE_READ_WRITE
;
118 /* "prime" the tile: convert data from linear to tiled if necessary
119 * and update the tile's layout info.
121 (void) llvmpipe_get_texture_tile(lpt
,
122 zsbuf
->face
+ zsbuf
->zslice
,
127 /* Get actual pointer to the tile data. Note that depth/stencil
128 * data is tiled differently than color data.
130 task
->depth_tile
= lp_rast_get_depth_block_pointer(task
,
134 assert(task
->depth_tile
);
137 task
->depth_tile
= NULL
;
144 * Clear the rasterizer's current color tile.
145 * This is a bin command called during bin processing.
148 lp_rast_clear_color(struct lp_rasterizer_task
*task
,
149 const union lp_rast_cmd_arg arg
)
151 const struct lp_scene
*scene
= task
->scene
;
152 const uint8_t *clear_color
= arg
.clear_color
;
156 LP_DBG(DEBUG_RAST
, "%s 0x%x,0x%x,0x%x,0x%x\n", __FUNCTION__
,
162 if (clear_color
[0] == clear_color
[1] &&
163 clear_color
[1] == clear_color
[2] &&
164 clear_color
[2] == clear_color
[3]) {
165 /* clear to grayscale value {x, x, x, x} */
166 for (i
= 0; i
< scene
->fb
.nr_cbufs
; i
++) {
168 lp_rast_get_color_tile_pointer(task
, i
, LP_TEX_USAGE_WRITE_ALL
);
169 memset(ptr
, clear_color
[0], TILE_SIZE
* TILE_SIZE
* 4);
174 * Note: if the swizzled tile layout changes (see TILE_PIXEL) this code
175 * will need to change. It'll be pretty obvious when clearing no longer
178 const unsigned chunk
= TILE_SIZE
/ 4;
179 for (i
= 0; i
< scene
->fb
.nr_cbufs
; i
++) {
181 lp_rast_get_color_tile_pointer(task
, i
, LP_TEX_USAGE_WRITE_ALL
);
184 for (j
= 0; j
< 4 * TILE_SIZE
; j
++) {
185 memset(c
, clear_color
[0], chunk
);
187 memset(c
, clear_color
[1], chunk
);
189 memset(c
, clear_color
[2], chunk
);
191 memset(c
, clear_color
[3], chunk
);
197 LP_COUNT(nr_color_tile_clear
);
206 * Clear the rasterizer's current z/stencil tile.
207 * This is a bin command called during bin processing.
210 lp_rast_clear_zstencil(struct lp_rasterizer_task
*task
,
211 const union lp_rast_cmd_arg arg
)
213 const struct lp_scene
*scene
= task
->scene
;
214 uint32_t clear_value
= arg
.clear_zstencil
.value
;
215 uint32_t clear_mask
= arg
.clear_zstencil
.mask
;
216 const unsigned height
= TILE_SIZE
/ TILE_VECTOR_HEIGHT
;
217 const unsigned width
= TILE_SIZE
* TILE_VECTOR_HEIGHT
;
218 const unsigned block_size
= scene
->zsbuf
.blocksize
;
219 const unsigned dst_stride
= scene
->zsbuf
.stride
* TILE_VECTOR_HEIGHT
;
223 LP_DBG(DEBUG_RAST
, "%s: value=0x%08x, mask=0x%08x\n",
224 __FUNCTION__
, clear_value
, clear_mask
);
227 * Clear the aera of the swizzled depth/depth buffer matching this tile, in
228 * stripes of TILE_VECTOR_HEIGHT x TILE_SIZE at a time.
230 * The swizzled depth format is such that the depths for
231 * TILE_VECTOR_HEIGHT x TILE_VECTOR_WIDTH pixels have consecutive offsets.
234 dst
= task
->depth_tile
;
236 clear_value
&= clear_mask
;
238 switch (block_size
) {
240 assert(clear_mask
== 0xff);
241 memset(dst
, (uint8_t) clear_value
, height
* width
);
244 if (clear_mask
== 0xffff) {
245 for (i
= 0; i
< height
; i
++) {
246 uint16_t *row
= (uint16_t *)dst
;
247 for (j
= 0; j
< width
; j
++)
248 *row
++ = (uint16_t) clear_value
;
253 for (i
= 0; i
< height
; i
++) {
254 uint16_t *row
= (uint16_t *)dst
;
255 for (j
= 0; j
< width
; j
++) {
256 uint16_t tmp
= ~clear_mask
& *row
;
257 *row
++ = clear_value
| tmp
;
264 if (clear_mask
== 0xffffffff) {
265 for (i
= 0; i
< height
; i
++) {
266 uint32_t *row
= (uint32_t *)dst
;
267 for (j
= 0; j
< width
; j
++)
268 *row
++ = clear_value
;
273 for (i
= 0; i
< height
; i
++) {
274 uint32_t *row
= (uint32_t *)dst
;
275 for (j
= 0; j
< width
; j
++) {
276 uint32_t tmp
= ~clear_mask
& *row
;
277 *row
++ = clear_value
| tmp
;
293 * Convert the color tile from tiled to linear layout.
294 * This is generally only done when we're flushing the scene just prior to
295 * SwapBuffers. If we didn't do this here, we'd have to convert the entire
296 * tiled color buffer to linear layout in the llvmpipe_texture_unmap()
297 * function. It's better to do it here to take advantage of
298 * threading/parallelism.
299 * This is a bin command which is stored in all bins.
302 lp_rast_store_linear_color( struct lp_rasterizer_task
*task
)
304 const struct lp_scene
*scene
= task
->scene
;
307 for (buf
= 0; buf
< scene
->fb
.nr_cbufs
; buf
++) {
308 struct pipe_surface
*cbuf
= scene
->fb
.cbufs
[buf
];
309 const unsigned face_slice
= cbuf
->face
+ cbuf
->zslice
;
310 const unsigned level
= cbuf
->level
;
311 struct llvmpipe_resource
*lpt
= llvmpipe_resource(cbuf
->texture
);
313 if (!task
->color_tiles
[buf
])
316 llvmpipe_unswizzle_cbuf_tile(lpt
,
320 task
->color_tiles
[buf
]);
327 * Run the shader on all blocks in a tile. This is used when a tile is
328 * completely contained inside a triangle.
329 * This is a bin command called during bin processing.
332 lp_rast_shade_tile(struct lp_rasterizer_task
*task
,
333 const union lp_rast_cmd_arg arg
)
335 const struct lp_scene
*scene
= task
->scene
;
336 const struct lp_rast_shader_inputs
*inputs
= arg
.shade_tile
;
337 const struct lp_rast_state
*state
= inputs
->state
;
338 struct lp_fragment_shader_variant
*variant
= state
->variant
;
339 const unsigned tile_x
= task
->x
, tile_y
= task
->y
;
342 if (inputs
->disable
) {
343 /* This command was partially binned and has been disabled */
347 LP_DBG(DEBUG_RAST
, "%s\n", __FUNCTION__
);
349 /* render the whole 64x64 tile in 4x4 chunks */
350 for (y
= 0; y
< TILE_SIZE
; y
+= 4){
351 for (x
= 0; x
< TILE_SIZE
; x
+= 4) {
352 uint8_t *color
[PIPE_MAX_COLOR_BUFS
];
357 for (i
= 0; i
< scene
->fb
.nr_cbufs
; i
++)
358 color
[i
] = lp_rast_get_color_block_pointer(task
, i
,
359 tile_x
+ x
, tile_y
+ y
);
362 depth
= lp_rast_get_depth_block_pointer(task
, tile_x
+ x
, tile_y
+ y
);
364 /* run shader on 4x4 block */
365 BEGIN_JIT_CALL(state
);
366 variant
->jit_function
[RAST_WHOLE
]( &state
->jit_context
,
367 tile_x
+ x
, tile_y
+ y
,
383 * Run the shader on all blocks in a tile. This is used when a tile is
384 * completely contained inside a triangle, and the shader is opaque.
385 * This is a bin command called during bin processing.
388 lp_rast_shade_tile_opaque(struct lp_rasterizer_task
*task
,
389 const union lp_rast_cmd_arg arg
)
391 const struct lp_scene
*scene
= task
->scene
;
394 LP_DBG(DEBUG_RAST
, "%s\n", __FUNCTION__
);
396 /* this will prevent converting the layout from tiled to linear */
397 for (i
= 0; i
< scene
->fb
.nr_cbufs
; i
++) {
398 (void)lp_rast_get_color_tile_pointer(task
, i
, LP_TEX_USAGE_WRITE_ALL
);
401 lp_rast_shade_tile(task
, arg
);
406 * Compute shading for a 4x4 block of pixels inside a triangle.
407 * This is a bin command called during bin processing.
408 * \param x X position of quad in window coords
409 * \param y Y position of quad in window coords
412 lp_rast_shade_quads_mask(struct lp_rasterizer_task
*task
,
413 const struct lp_rast_shader_inputs
*inputs
,
414 unsigned x
, unsigned y
,
417 const struct lp_rast_state
*state
= inputs
->state
;
418 struct lp_fragment_shader_variant
*variant
= state
->variant
;
419 const struct lp_scene
*scene
= task
->scene
;
420 uint8_t *color
[PIPE_MAX_COLOR_BUFS
];
427 assert(x
% TILE_VECTOR_WIDTH
== 0);
428 assert(y
% TILE_VECTOR_HEIGHT
== 0);
430 assert((x
% 4) == 0);
431 assert((y
% 4) == 0);
434 for (i
= 0; i
< scene
->fb
.nr_cbufs
; i
++) {
435 color
[i
] = lp_rast_get_color_block_pointer(task
, i
, x
, y
);
436 assert(lp_check_alignment(color
[i
], 16));
440 depth
= lp_rast_get_depth_block_pointer(task
, x
, y
);
443 assert(lp_check_alignment(state
->jit_context
.blend_color
, 16));
445 /* run shader on 4x4 block */
446 BEGIN_JIT_CALL(state
);
447 variant
->jit_function
[RAST_EDGE_TEST
](&state
->jit_context
,
463 * Begin a new occlusion query.
464 * This is a bin command put in all bins.
468 lp_rast_begin_query(struct lp_rasterizer_task
*task
,
469 const union lp_rast_cmd_arg arg
)
471 struct llvmpipe_query
*pq
= arg
.query_obj
;
473 assert(task
->query
== NULL
);
474 task
->vis_counter
= 0;
480 * End the current occlusion query.
481 * This is a bin command put in all bins.
485 lp_rast_end_query(struct lp_rasterizer_task
*task
,
486 const union lp_rast_cmd_arg arg
)
488 task
->query
->count
[task
->thread_index
] += task
->vis_counter
;
495 * Set top row and left column of the tile's pixels to white. For debugging.
498 outline_tile(uint8_t *tile
)
500 const uint8_t val
= 0xff;
503 for (i
= 0; i
< TILE_SIZE
; i
++) {
504 TILE_PIXEL(tile
, i
, 0, 0) = val
;
505 TILE_PIXEL(tile
, i
, 0, 1) = val
;
506 TILE_PIXEL(tile
, i
, 0, 2) = val
;
507 TILE_PIXEL(tile
, i
, 0, 3) = val
;
509 TILE_PIXEL(tile
, 0, i
, 0) = val
;
510 TILE_PIXEL(tile
, 0, i
, 1) = val
;
511 TILE_PIXEL(tile
, 0, i
, 2) = val
;
512 TILE_PIXEL(tile
, 0, i
, 3) = val
;
518 * Draw grid of gray lines at 16-pixel intervals across the tile to
519 * show the sub-tile boundaries. For debugging.
522 outline_subtiles(uint8_t *tile
)
524 const uint8_t val
= 0x80;
525 const unsigned step
= 16;
528 for (i
= 0; i
< TILE_SIZE
; i
+= step
) {
529 for (j
= 0; j
< TILE_SIZE
; j
++) {
530 TILE_PIXEL(tile
, i
, j
, 0) = val
;
531 TILE_PIXEL(tile
, i
, j
, 1) = val
;
532 TILE_PIXEL(tile
, i
, j
, 2) = val
;
533 TILE_PIXEL(tile
, i
, j
, 3) = val
;
535 TILE_PIXEL(tile
, j
, i
, 0) = val
;
536 TILE_PIXEL(tile
, j
, i
, 1) = val
;
537 TILE_PIXEL(tile
, j
, i
, 2) = val
;
538 TILE_PIXEL(tile
, j
, i
, 3) = val
;
548 * Called when we're done writing to a color tile.
551 lp_rast_tile_end(struct lp_rasterizer_task
*task
)
554 if (LP_DEBUG
& (DEBUG_SHOW_SUBTILES
| DEBUG_SHOW_TILES
)) {
555 const struct lp_scene
*scene
= task
->scene
;
558 for (buf
= 0; buf
< scene
->fb
.nr_cbufs
; buf
++) {
559 uint8_t *color
= lp_rast_get_color_block_pointer(task
, buf
,
562 if (LP_DEBUG
& DEBUG_SHOW_SUBTILES
)
563 outline_subtiles(color
);
564 else if (LP_DEBUG
& DEBUG_SHOW_TILES
)
569 (void) outline_subtiles
;
572 lp_rast_store_linear_color(task
);
575 union lp_rast_cmd_arg dummy
= {0};
576 lp_rast_end_query(task
, dummy
);
580 memset(task
->color_tiles
, 0, sizeof(task
->color_tiles
));
581 task
->depth_tile
= NULL
;
586 static lp_rast_cmd_func dispatch
[LP_RAST_OP_MAX
] =
589 lp_rast_clear_zstencil
,
598 lp_rast_triangle_3_4
,
599 lp_rast_triangle_3_16
,
601 lp_rast_shade_tile_opaque
,
608 do_rasterize_bin(struct lp_rasterizer_task
*task
,
609 const struct cmd_bin
*bin
)
611 const struct cmd_block
*block
;
617 for (block
= bin
->head
; block
; block
= block
->next
) {
618 for (k
= 0; k
< block
->count
; k
++) {
619 dispatch
[block
->cmd
[k
]]( task
, block
->arg
[k
] );
627 * Rasterize commands for a single bin.
628 * \param x, y position of the bin's tile in the framebuffer
629 * Must be called between lp_rast_begin() and lp_rast_end().
633 rasterize_bin(struct lp_rasterizer_task
*task
,
634 const struct cmd_bin
*bin
)
636 lp_rast_tile_begin( task
, bin
);
638 do_rasterize_bin(task
, bin
);
640 lp_rast_tile_end(task
);
645 if (bin
->head
->count
== 1) {
646 if (bin
->head
->cmd
[0] == LP_RAST_OP_SHADE_TILE_OPAQUE
)
647 LP_COUNT(nr_pure_shade_opaque_64
);
648 else if (bin
->head
->cmd
[0] == LP_RAST_OP_SHADE_TILE
)
649 LP_COUNT(nr_pure_shade_64
);
654 /* An empty bin is one that just loads the contents of the tile and
655 * stores them again unchanged. This typically happens when bins have
656 * been flushed for some reason in the middle of a frame, or when
657 * incremental updates are being made to a render target.
659 * Try to avoid doing pointless work in this case.
662 is_empty_bin( const struct cmd_bin
*bin
)
664 return bin
->head
== NULL
;
669 * Rasterize/execute all bins within a scene.
673 rasterize_scene(struct lp_rasterizer_task
*task
,
674 struct lp_scene
*scene
)
677 /* loop over scene bins, rasterize each */
681 for (i
= 0; i
< scene
->tiles_x
; i
++) {
682 for (j
= 0; j
< scene
->tiles_y
; j
++) {
683 struct cmd_bin
*bin
= lp_scene_get_bin(scene
, i
, j
);
684 rasterize_bin(task
, bin
, i
, j
);
693 while ((bin
= lp_scene_bin_iter_next(scene
))) {
694 if (!is_empty_bin( bin
))
695 rasterize_bin(task
, bin
);
701 lp_fence_signal(scene
->fence
);
709 * Called by setup module when it has something for us to render.
712 lp_rast_queue_scene( struct lp_rasterizer
*rast
,
713 struct lp_scene
*scene
)
715 LP_DBG(DEBUG_SETUP
, "%s\n", __FUNCTION__
);
717 if (rast
->num_threads
== 0) {
720 lp_rast_begin( rast
, scene
);
722 rasterize_scene( &rast
->tasks
[0], scene
);
726 rast
->curr_scene
= NULL
;
729 /* threaded rendering! */
732 lp_scene_enqueue( rast
->full_scenes
, scene
);
734 /* signal the threads that there's work to do */
735 for (i
= 0; i
< rast
->num_threads
; i
++) {
736 pipe_semaphore_signal(&rast
->tasks
[i
].work_ready
);
740 LP_DBG(DEBUG_SETUP
, "%s done \n", __FUNCTION__
);
745 lp_rast_finish( struct lp_rasterizer
*rast
)
747 if (rast
->num_threads
== 0) {
753 /* wait for work to complete */
754 for (i
= 0; i
< rast
->num_threads
; i
++) {
755 pipe_semaphore_wait(&rast
->tasks
[i
].work_done
);
762 * This is the thread's main entrypoint.
763 * It's a simple loop:
766 * 3. signal that we're done
768 static PIPE_THREAD_ROUTINE( thread_func
, init_data
)
770 struct lp_rasterizer_task
*task
= (struct lp_rasterizer_task
*) init_data
;
771 struct lp_rasterizer
*rast
= task
->rast
;
772 boolean debug
= false;
777 debug_printf("thread %d waiting for work\n", task
->thread_index
);
778 pipe_semaphore_wait(&task
->work_ready
);
783 if (task
->thread_index
== 0) {
785 * - get next scene to rasterize
786 * - map the framebuffer surfaces
789 lp_scene_dequeue( rast
->full_scenes
, TRUE
) );
792 /* Wait for all threads to get here so that threads[1+] don't
793 * get a null rast->curr_scene pointer.
795 pipe_barrier_wait( &rast
->barrier
);
799 debug_printf("thread %d doing work\n", task
->thread_index
);
801 rasterize_scene(task
,
804 /* wait for all threads to finish with this scene */
805 pipe_barrier_wait( &rast
->barrier
);
807 /* XXX: shouldn't be necessary:
809 if (task
->thread_index
== 0) {
813 /* signal done with work */
815 debug_printf("thread %d done working\n", task
->thread_index
);
817 pipe_semaphore_signal(&task
->work_done
);
825 * Initialize semaphores and spawn the threads.
828 create_rast_threads(struct lp_rasterizer
*rast
)
832 /* NOTE: if num_threads is zero, we won't use any threads */
833 for (i
= 0; i
< rast
->num_threads
; i
++) {
834 pipe_semaphore_init(&rast
->tasks
[i
].work_ready
, 0);
835 pipe_semaphore_init(&rast
->tasks
[i
].work_done
, 0);
836 rast
->threads
[i
] = pipe_thread_create(thread_func
,
837 (void *) &rast
->tasks
[i
]);
844 * Create new lp_rasterizer. If num_threads is zero, don't create any
845 * new threads, do rendering synchronously.
846 * \param num_threads number of rasterizer threads to create
848 struct lp_rasterizer
*
849 lp_rast_create( unsigned num_threads
)
851 struct lp_rasterizer
*rast
;
854 rast
= CALLOC_STRUCT(lp_rasterizer
);
858 rast
->full_scenes
= lp_scene_queue_create();
860 for (i
= 0; i
< Elements(rast
->tasks
); i
++) {
861 struct lp_rasterizer_task
*task
= &rast
->tasks
[i
];
863 task
->thread_index
= i
;
866 rast
->num_threads
= num_threads
;
868 create_rast_threads(rast
);
870 /* for synchronizing rasterization threads */
871 pipe_barrier_init( &rast
->barrier
, rast
->num_threads
);
873 memset(lp_swizzled_cbuf
, 0, sizeof lp_swizzled_cbuf
);
875 memset(lp_dummy_tile
, 0, sizeof lp_dummy_tile
);
883 void lp_rast_destroy( struct lp_rasterizer
*rast
)
887 /* Set exit_flag and signal each thread's work_ready semaphore.
888 * Each thread will be woken up, notice that the exit_flag is set and
889 * break out of its main loop. The thread will then exit.
891 rast
->exit_flag
= TRUE
;
892 for (i
= 0; i
< rast
->num_threads
; i
++) {
893 pipe_semaphore_signal(&rast
->tasks
[i
].work_ready
);
896 /* Wait for threads to terminate before cleaning up per-thread data */
897 for (i
= 0; i
< rast
->num_threads
; i
++) {
898 pipe_thread_wait(rast
->threads
[i
]);
901 /* Clean up per-thread data */
902 for (i
= 0; i
< rast
->num_threads
; i
++) {
903 pipe_semaphore_destroy(&rast
->tasks
[i
].work_ready
);
904 pipe_semaphore_destroy(&rast
->tasks
[i
].work_done
);
907 /* for synchronizing rasterization threads */
908 pipe_barrier_destroy( &rast
->barrier
);
910 lp_scene_queue_destroy(rast
->full_scenes
);
916 /** Return number of rasterization threads */
918 lp_rast_get_num_threads( struct lp_rasterizer
*rast
)
920 return rast
->num_threads
;