1 /**************************************************************************
3 * Copyright 2009 VMware, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
29 #include "util/u_memory.h"
30 #include "util/u_math.h"
31 #include "util/u_rect.h"
32 #include "util/u_surface.h"
33 #include "util/u_pack_color.h"
35 #include "os/os_time.h"
37 #include "lp_scene_queue.h"
43 #include "lp_rast_priv.h"
44 #include "gallivm/lp_bld_debug.h"
46 #include "lp_tex_sample.h"
51 const struct lp_rast_state
*jit_state
= NULL
;
52 const struct lp_rasterizer_task
*jit_task
= NULL
;
57 * Begin rasterizing a scene.
58 * Called once per scene by one thread.
61 lp_rast_begin( struct lp_rasterizer
*rast
,
62 struct lp_scene
*scene
)
64 rast
->curr_scene
= scene
;
66 LP_DBG(DEBUG_RAST
, "%s\n", __FUNCTION__
);
68 lp_scene_begin_rasterization( scene
);
69 lp_scene_bin_iter_begin( scene
);
74 lp_rast_end( struct lp_rasterizer
*rast
)
76 lp_scene_end_rasterization( rast
->curr_scene
);
78 rast
->curr_scene
= NULL
;
83 * Begining rasterization of a tile.
84 * \param x window X position of the tile, in pixels
85 * \param y window Y position of the tile, in pixels
88 lp_rast_tile_begin(struct lp_rasterizer_task
*task
,
89 const struct cmd_bin
*bin
,
92 LP_DBG(DEBUG_RAST
, "%s %d,%d\n", __FUNCTION__
, x
, y
);
95 task
->x
= x
* TILE_SIZE
;
96 task
->y
= y
* TILE_SIZE
;
97 task
->width
= TILE_SIZE
+ x
* TILE_SIZE
> task
->scene
->fb
.width
?
98 task
->scene
->fb
.width
- x
* TILE_SIZE
: TILE_SIZE
;
99 task
->height
= TILE_SIZE
+ y
* TILE_SIZE
> task
->scene
->fb
.height
?
100 task
->scene
->fb
.height
- y
* TILE_SIZE
: TILE_SIZE
;
102 task
->thread_data
.vis_counter
= 0;
103 task
->ps_invocations
= 0;
105 /* reset pointers to color and depth tile(s) */
106 memset(task
->color_tiles
, 0, sizeof(task
->color_tiles
));
107 task
->depth_tile
= NULL
;
112 * Clear the rasterizer's current color tile.
113 * This is a bin command called during bin processing.
114 * Clear commands always clear all bound layers.
117 lp_rast_clear_color(struct lp_rasterizer_task
*task
,
118 const union lp_rast_cmd_arg arg
)
120 const struct lp_scene
*scene
= task
->scene
;
122 if (scene
->fb
.nr_cbufs
) {
126 if (util_format_is_pure_integer(scene
->fb
.cbufs
[0]->format
)) {
128 * We expect int/uint clear values here, though some APIs
129 * might disagree (but in any case util_pack_color()
130 * couldn't handle it)...
132 LP_DBG(DEBUG_RAST
, "%s pure int 0x%x,0x%x,0x%x,0x%x\n", __FUNCTION__
,
133 arg
.clear_color
.ui
[0],
134 arg
.clear_color
.ui
[1],
135 arg
.clear_color
.ui
[2],
136 arg
.clear_color
.ui
[3]);
138 for (i
= 0; i
< scene
->fb
.nr_cbufs
; i
++) {
139 enum pipe_format format
= scene
->fb
.cbufs
[i
]->format
;
141 if (util_format_is_pure_sint(format
)) {
142 util_format_write_4i(format
, arg
.clear_color
.i
, 0, &uc
, 0, 0, 0, 1, 1);
145 assert(util_format_is_pure_uint(format
));
146 util_format_write_4ui(format
, arg
.clear_color
.ui
, 0, &uc
, 0, 0, 0, 1, 1);
149 util_fill_box(scene
->cbufs
[i
].map
,
151 scene
->cbufs
[i
].stride
,
152 scene
->cbufs
[i
].layer_stride
,
158 scene
->fb_max_layer
+ 1,
163 uint8_t clear_color
[4];
165 for (i
= 0; i
< 4; ++i
) {
166 clear_color
[i
] = float_to_ubyte(arg
.clear_color
.f
[i
]);
169 LP_DBG(DEBUG_RAST
, "%s 0x%x,0x%x,0x%x,0x%x\n", __FUNCTION__
,
175 for (i
= 0; i
< scene
->fb
.nr_cbufs
; i
++) {
176 util_pack_color(arg
.clear_color
.f
,
177 scene
->fb
.cbufs
[i
]->format
, &uc
);
179 util_fill_box(scene
->cbufs
[i
].map
,
180 scene
->fb
.cbufs
[i
]->format
,
181 scene
->cbufs
[i
].stride
,
182 scene
->cbufs
[i
].layer_stride
,
188 scene
->fb_max_layer
+ 1,
194 LP_COUNT(nr_color_tile_clear
);
201 * Clear the rasterizer's current z/stencil tile.
202 * This is a bin command called during bin processing.
203 * Clear commands always clear all bound layers.
206 lp_rast_clear_zstencil(struct lp_rasterizer_task
*task
,
207 const union lp_rast_cmd_arg arg
)
209 const struct lp_scene
*scene
= task
->scene
;
210 uint64_t clear_value64
= arg
.clear_zstencil
.value
;
211 uint64_t clear_mask64
= arg
.clear_zstencil
.mask
;
212 uint32_t clear_value
= (uint32_t) clear_value64
;
213 uint32_t clear_mask
= (uint32_t) clear_mask64
;
214 const unsigned height
= task
->height
;
215 const unsigned width
= task
->width
;
216 const unsigned dst_stride
= scene
->zsbuf
.stride
;
221 LP_DBG(DEBUG_RAST
, "%s: value=0x%08x, mask=0x%08x\n",
222 __FUNCTION__
, clear_value
, clear_mask
);
225 * Clear the area of the depth/depth buffer matching this tile.
228 if (scene
->fb
.zsbuf
) {
230 uint8_t *dst_layer
= lp_rast_get_unswizzled_depth_tile_pointer(task
, LP_TEX_USAGE_READ_WRITE
);
231 block_size
= util_format_get_blocksize(scene
->fb
.zsbuf
->format
);
233 clear_value
&= clear_mask
;
235 for (layer
= 0; layer
<= scene
->fb_max_layer
; layer
++) {
238 switch (block_size
) {
240 assert(clear_mask
== 0xff);
241 memset(dst
, (uint8_t) clear_value
, height
* width
);
244 if (clear_mask
== 0xffff) {
245 for (i
= 0; i
< height
; i
++) {
246 uint16_t *row
= (uint16_t *)dst
;
247 for (j
= 0; j
< width
; j
++)
248 *row
++ = (uint16_t) clear_value
;
253 for (i
= 0; i
< height
; i
++) {
254 uint16_t *row
= (uint16_t *)dst
;
255 for (j
= 0; j
< width
; j
++) {
256 uint16_t tmp
= ~clear_mask
& *row
;
257 *row
++ = clear_value
| tmp
;
264 if (clear_mask
== 0xffffffff) {
265 for (i
= 0; i
< height
; i
++) {
266 uint32_t *row
= (uint32_t *)dst
;
267 for (j
= 0; j
< width
; j
++)
268 *row
++ = clear_value
;
273 for (i
= 0; i
< height
; i
++) {
274 uint32_t *row
= (uint32_t *)dst
;
275 for (j
= 0; j
< width
; j
++) {
276 uint32_t tmp
= ~clear_mask
& *row
;
277 *row
++ = clear_value
| tmp
;
284 clear_value64
&= clear_mask64
;
285 if (clear_mask64
== 0xffffffffffULL
) {
286 for (i
= 0; i
< height
; i
++) {
287 uint64_t *row
= (uint64_t *)dst
;
288 for (j
= 0; j
< width
; j
++)
289 *row
++ = clear_value64
;
294 for (i
= 0; i
< height
; i
++) {
295 uint64_t *row
= (uint64_t *)dst
;
296 for (j
= 0; j
< width
; j
++) {
297 uint64_t tmp
= ~clear_mask64
& *row
;
298 *row
++ = clear_value64
| tmp
;
309 dst_layer
+= scene
->zsbuf
.layer_stride
;
317 * Run the shader on all blocks in a tile. This is used when a tile is
318 * completely contained inside a triangle.
319 * This is a bin command called during bin processing.
322 lp_rast_shade_tile(struct lp_rasterizer_task
*task
,
323 const union lp_rast_cmd_arg arg
)
325 const struct lp_scene
*scene
= task
->scene
;
326 const struct lp_rast_shader_inputs
*inputs
= arg
.shade_tile
;
327 const struct lp_rast_state
*state
;
328 struct lp_fragment_shader_variant
*variant
;
329 const unsigned tile_x
= task
->x
, tile_y
= task
->y
;
332 if (inputs
->disable
) {
333 /* This command was partially binned and has been disabled */
337 LP_DBG(DEBUG_RAST
, "%s\n", __FUNCTION__
);
344 variant
= state
->variant
;
346 /* render the whole 64x64 tile in 4x4 chunks */
347 for (y
= 0; y
< task
->height
; y
+= 4){
348 for (x
= 0; x
< task
->width
; x
+= 4) {
349 uint8_t *color
[PIPE_MAX_COLOR_BUFS
];
350 unsigned stride
[PIPE_MAX_COLOR_BUFS
];
351 uint8_t *depth
= NULL
;
352 unsigned depth_stride
= 0;
356 for (i
= 0; i
< scene
->fb
.nr_cbufs
; i
++){
357 stride
[i
] = scene
->cbufs
[i
].stride
;
358 color
[i
] = lp_rast_get_unswizzled_color_block_pointer(task
, i
, tile_x
+ x
,
359 tile_y
+ y
, inputs
->layer
);
363 if (scene
->zsbuf
.map
) {
364 depth
= lp_rast_get_unswizzled_depth_block_pointer(task
, tile_x
+ x
,
365 tile_y
+ y
, inputs
->layer
);
366 depth_stride
= scene
->zsbuf
.stride
;
369 /* run shader on 4x4 block */
370 BEGIN_JIT_CALL(state
, task
);
371 variant
->jit_function
[RAST_WHOLE
]( &state
->jit_context
,
372 tile_x
+ x
, tile_y
+ y
,
390 * Run the shader on all blocks in a tile. This is used when a tile is
391 * completely contained inside a triangle, and the shader is opaque.
392 * This is a bin command called during bin processing.
395 lp_rast_shade_tile_opaque(struct lp_rasterizer_task
*task
,
396 const union lp_rast_cmd_arg arg
)
398 LP_DBG(DEBUG_RAST
, "%s\n", __FUNCTION__
);
405 lp_rast_shade_tile(task
, arg
);
410 * Compute shading for a 4x4 block of pixels inside a triangle.
411 * This is a bin command called during bin processing.
412 * \param x X position of quad in window coords
413 * \param y Y position of quad in window coords
416 lp_rast_shade_quads_mask(struct lp_rasterizer_task
*task
,
417 const struct lp_rast_shader_inputs
*inputs
,
418 unsigned x
, unsigned y
,
421 const struct lp_rast_state
*state
= task
->state
;
422 struct lp_fragment_shader_variant
*variant
= state
->variant
;
423 const struct lp_scene
*scene
= task
->scene
;
424 uint8_t *color
[PIPE_MAX_COLOR_BUFS
];
425 unsigned stride
[PIPE_MAX_COLOR_BUFS
];
426 uint8_t *depth
= NULL
;
427 unsigned depth_stride
= 0;
433 assert(x
< scene
->tiles_x
* TILE_SIZE
);
434 assert(y
< scene
->tiles_y
* TILE_SIZE
);
435 assert(x
% TILE_VECTOR_WIDTH
== 0);
436 assert(y
% TILE_VECTOR_HEIGHT
== 0);
438 assert((x
% 4) == 0);
439 assert((y
% 4) == 0);
442 for (i
= 0; i
< scene
->fb
.nr_cbufs
; i
++) {
443 stride
[i
] = scene
->cbufs
[i
].stride
;
444 color
[i
] = lp_rast_get_unswizzled_color_block_pointer(task
, i
, x
, y
, inputs
->layer
);
448 if (scene
->zsbuf
.map
) {
449 depth_stride
= scene
->zsbuf
.stride
;
450 depth
= lp_rast_get_unswizzled_depth_block_pointer(task
, x
, y
, inputs
->layer
);
453 assert(lp_check_alignment(state
->jit_context
.u8_blend_color
, 16));
456 * The rasterizer may produce fragments outside our
457 * allocated 4x4 blocks hence need to filter them out here.
459 if ((x
% TILE_SIZE
) < task
->width
&& (y
% TILE_SIZE
) < task
->height
) {
460 /* not very accurate would need a popcount on the mask */
461 /* always count this not worth bothering? */
462 task
->ps_invocations
++;
464 /* run shader on 4x4 block */
465 BEGIN_JIT_CALL(state
, task
);
466 variant
->jit_function
[RAST_EDGE_TEST
](&state
->jit_context
,
485 * Begin a new occlusion query.
486 * This is a bin command put in all bins.
490 lp_rast_begin_query(struct lp_rasterizer_task
*task
,
491 const union lp_rast_cmd_arg arg
)
493 struct llvmpipe_query
*pq
= arg
.query_obj
;
496 case PIPE_QUERY_OCCLUSION_COUNTER
:
497 case PIPE_QUERY_OCCLUSION_PREDICATE
:
498 pq
->start
[task
->thread_index
] = task
->thread_data
.vis_counter
;
500 case PIPE_QUERY_PIPELINE_STATISTICS
:
501 pq
->start
[task
->thread_index
] = task
->ps_invocations
;
511 * End the current occlusion query.
512 * This is a bin command put in all bins.
516 lp_rast_end_query(struct lp_rasterizer_task
*task
,
517 const union lp_rast_cmd_arg arg
)
519 struct llvmpipe_query
*pq
= arg
.query_obj
;
522 case PIPE_QUERY_OCCLUSION_COUNTER
:
523 case PIPE_QUERY_OCCLUSION_PREDICATE
:
524 pq
->end
[task
->thread_index
] +=
525 task
->thread_data
.vis_counter
- pq
->start
[task
->thread_index
];
526 pq
->start
[task
->thread_index
] = 0;
528 case PIPE_QUERY_TIMESTAMP
:
529 pq
->end
[task
->thread_index
] = os_time_get_nano();
531 case PIPE_QUERY_PIPELINE_STATISTICS
:
532 pq
->end
[task
->thread_index
] +=
533 task
->ps_invocations
- pq
->start
[task
->thread_index
];
534 pq
->start
[task
->thread_index
] = 0;
544 lp_rast_set_state(struct lp_rasterizer_task
*task
,
545 const union lp_rast_cmd_arg arg
)
547 task
->state
= arg
.state
;
553 * Called when we're done writing to a color tile.
556 lp_rast_tile_end(struct lp_rasterizer_task
*task
)
560 for (i
= 0; i
< task
->scene
->num_active_queries
; ++i
) {
561 lp_rast_end_query(task
, lp_rast_arg_query(task
->scene
->active_queries
[i
]));
565 memset(task
->color_tiles
, 0, sizeof(task
->color_tiles
));
566 task
->depth_tile
= NULL
;
571 static lp_rast_cmd_func dispatch
[LP_RAST_OP_MAX
] =
574 lp_rast_clear_zstencil
,
583 lp_rast_triangle_3_4
,
584 lp_rast_triangle_3_16
,
585 lp_rast_triangle_4_16
,
587 lp_rast_shade_tile_opaque
,
595 do_rasterize_bin(struct lp_rasterizer_task
*task
,
596 const struct cmd_bin
*bin
,
599 const struct cmd_block
*block
;
603 lp_debug_bin(bin
, x
, y
);
605 for (block
= bin
->head
; block
; block
= block
->next
) {
606 for (k
= 0; k
< block
->count
; k
++) {
607 dispatch
[block
->cmd
[k
]]( task
, block
->arg
[k
] );
615 * Rasterize commands for a single bin.
616 * \param x, y position of the bin's tile in the framebuffer
617 * Must be called between lp_rast_begin() and lp_rast_end().
621 rasterize_bin(struct lp_rasterizer_task
*task
,
622 const struct cmd_bin
*bin
, int x
, int y
)
624 lp_rast_tile_begin( task
, bin
, x
, y
);
626 do_rasterize_bin(task
, bin
, x
, y
);
628 lp_rast_tile_end(task
);
633 if (bin
->head
->count
== 1) {
634 if (bin
->head
->cmd
[0] == LP_RAST_OP_SHADE_TILE_OPAQUE
)
635 LP_COUNT(nr_pure_shade_opaque_64
);
636 else if (bin
->head
->cmd
[0] == LP_RAST_OP_SHADE_TILE
)
637 LP_COUNT(nr_pure_shade_64
);
642 /* An empty bin is one that just loads the contents of the tile and
643 * stores them again unchanged. This typically happens when bins have
644 * been flushed for some reason in the middle of a frame, or when
645 * incremental updates are being made to a render target.
647 * Try to avoid doing pointless work in this case.
650 is_empty_bin( const struct cmd_bin
*bin
)
652 return bin
->head
== NULL
;
657 * Rasterize/execute all bins within a scene.
661 rasterize_scene(struct lp_rasterizer_task
*task
,
662 struct lp_scene
*scene
)
666 if (!task
->rast
->no_rast
&& !scene
->discard
) {
667 /* loop over scene bins, rasterize each */
673 while ((bin
= lp_scene_bin_iter_next(scene
, &i
, &j
))) {
674 if (!is_empty_bin( bin
))
675 rasterize_bin(task
, bin
, i
, j
);
682 lp_fence_signal(scene
->fence
);
690 * Called by setup module when it has something for us to render.
693 lp_rast_queue_scene( struct lp_rasterizer
*rast
,
694 struct lp_scene
*scene
)
696 LP_DBG(DEBUG_SETUP
, "%s\n", __FUNCTION__
);
698 if (rast
->num_threads
== 0) {
700 unsigned fpstate
= util_fpstate_get();
702 /* Make sure that denorms are treated like zeros. This is
703 * the behavior required by D3D10. OpenGL doesn't care.
705 util_fpstate_set_denorms_to_zero(fpstate
);
707 lp_rast_begin( rast
, scene
);
709 rasterize_scene( &rast
->tasks
[0], scene
);
713 util_fpstate_set(fpstate
);
715 rast
->curr_scene
= NULL
;
718 /* threaded rendering! */
721 lp_scene_enqueue( rast
->full_scenes
, scene
);
723 /* signal the threads that there's work to do */
724 for (i
= 0; i
< rast
->num_threads
; i
++) {
725 pipe_semaphore_signal(&rast
->tasks
[i
].work_ready
);
729 LP_DBG(DEBUG_SETUP
, "%s done \n", __FUNCTION__
);
734 lp_rast_finish( struct lp_rasterizer
*rast
)
736 if (rast
->num_threads
== 0) {
742 /* wait for work to complete */
743 for (i
= 0; i
< rast
->num_threads
; i
++) {
744 pipe_semaphore_wait(&rast
->tasks
[i
].work_done
);
751 * This is the thread's main entrypoint.
752 * It's a simple loop:
755 * 3. signal that we're done
757 static PIPE_THREAD_ROUTINE( thread_function
, init_data
)
759 struct lp_rasterizer_task
*task
= (struct lp_rasterizer_task
*) init_data
;
760 struct lp_rasterizer
*rast
= task
->rast
;
761 boolean debug
= false;
762 unsigned fpstate
= util_fpstate_get();
764 /* Make sure that denorms are treated like zeros. This is
765 * the behavior required by D3D10. OpenGL doesn't care.
767 util_fpstate_set_denorms_to_zero(fpstate
);
772 debug_printf("thread %d waiting for work\n", task
->thread_index
);
773 pipe_semaphore_wait(&task
->work_ready
);
778 if (task
->thread_index
== 0) {
780 * - get next scene to rasterize
781 * - map the framebuffer surfaces
784 lp_scene_dequeue( rast
->full_scenes
, TRUE
) );
787 /* Wait for all threads to get here so that threads[1+] don't
788 * get a null rast->curr_scene pointer.
790 pipe_barrier_wait( &rast
->barrier
);
794 debug_printf("thread %d doing work\n", task
->thread_index
);
796 rasterize_scene(task
,
799 /* wait for all threads to finish with this scene */
800 pipe_barrier_wait( &rast
->barrier
);
802 /* XXX: shouldn't be necessary:
804 if (task
->thread_index
== 0) {
808 /* signal done with work */
810 debug_printf("thread %d done working\n", task
->thread_index
);
812 pipe_semaphore_signal(&task
->work_done
);
820 * Initialize semaphores and spawn the threads.
823 create_rast_threads(struct lp_rasterizer
*rast
)
827 /* NOTE: if num_threads is zero, we won't use any threads */
828 for (i
= 0; i
< rast
->num_threads
; i
++) {
829 pipe_semaphore_init(&rast
->tasks
[i
].work_ready
, 0);
830 pipe_semaphore_init(&rast
->tasks
[i
].work_done
, 0);
831 rast
->threads
[i
] = pipe_thread_create(thread_function
,
832 (void *) &rast
->tasks
[i
]);
839 * Create new lp_rasterizer. If num_threads is zero, don't create any
840 * new threads, do rendering synchronously.
841 * \param num_threads number of rasterizer threads to create
843 struct lp_rasterizer
*
844 lp_rast_create( unsigned num_threads
)
846 struct lp_rasterizer
*rast
;
849 rast
= CALLOC_STRUCT(lp_rasterizer
);
854 rast
->full_scenes
= lp_scene_queue_create();
855 if (!rast
->full_scenes
) {
859 for (i
= 0; i
< Elements(rast
->tasks
); i
++) {
860 struct lp_rasterizer_task
*task
= &rast
->tasks
[i
];
862 task
->thread_index
= i
;
865 rast
->num_threads
= num_threads
;
867 rast
->no_rast
= debug_get_bool_option("LP_NO_RAST", FALSE
);
869 create_rast_threads(rast
);
871 /* for synchronizing rasterization threads */
872 pipe_barrier_init( &rast
->barrier
, rast
->num_threads
);
874 memset(lp_dummy_tile
, 0, sizeof lp_dummy_tile
);
887 void lp_rast_destroy( struct lp_rasterizer
*rast
)
891 /* Set exit_flag and signal each thread's work_ready semaphore.
892 * Each thread will be woken up, notice that the exit_flag is set and
893 * break out of its main loop. The thread will then exit.
895 rast
->exit_flag
= TRUE
;
896 for (i
= 0; i
< rast
->num_threads
; i
++) {
897 pipe_semaphore_signal(&rast
->tasks
[i
].work_ready
);
900 /* Wait for threads to terminate before cleaning up per-thread data */
901 for (i
= 0; i
< rast
->num_threads
; i
++) {
902 pipe_thread_wait(rast
->threads
[i
]);
905 /* Clean up per-thread data */
906 for (i
= 0; i
< rast
->num_threads
; i
++) {
907 pipe_semaphore_destroy(&rast
->tasks
[i
].work_ready
);
908 pipe_semaphore_destroy(&rast
->tasks
[i
].work_done
);
911 /* for synchronizing rasterization threads */
912 pipe_barrier_destroy( &rast
->barrier
);
914 lp_scene_queue_destroy(rast
->full_scenes
);