1 /**************************************************************************
3 * Copyright 2009 VMware, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
29 #include "util/u_memory.h"
30 #include "util/u_math.h"
31 #include "util/u_rect.h"
32 #include "util/u_surface.h"
33 #include "util/u_pack_color.h"
35 #include "os/os_time.h"
37 #include "lp_scene_queue.h"
43 #include "lp_rast_priv.h"
44 #include "gallivm/lp_bld_debug.h"
46 #include "lp_tex_sample.h"
51 const struct lp_rast_state
*jit_state
= NULL
;
52 const struct lp_rasterizer_task
*jit_task
= NULL
;
57 * Begin rasterizing a scene.
58 * Called once per scene by one thread.
61 lp_rast_begin( struct lp_rasterizer
*rast
,
62 struct lp_scene
*scene
)
65 rast
->curr_scene
= scene
;
67 LP_DBG(DEBUG_RAST
, "%s\n", __FUNCTION__
);
69 lp_scene_begin_rasterization( scene
);
70 lp_scene_bin_iter_begin( scene
);
75 lp_rast_end( struct lp_rasterizer
*rast
)
77 lp_scene_end_rasterization( rast
->curr_scene
);
79 rast
->curr_scene
= NULL
;
84 * Begining rasterization of a tile.
85 * \param x window X position of the tile, in pixels
86 * \param y window Y position of the tile, in pixels
89 lp_rast_tile_begin(struct lp_rasterizer_task
*task
,
90 const struct cmd_bin
*bin
,
93 LP_DBG(DEBUG_RAST
, "%s %d,%d\n", __FUNCTION__
, x
, y
);
96 task
->x
= x
* TILE_SIZE
;
97 task
->y
= y
* TILE_SIZE
;
98 task
->width
= TILE_SIZE
+ x
* TILE_SIZE
> task
->scene
->fb
.width
?
99 task
->scene
->fb
.width
- x
* TILE_SIZE
: TILE_SIZE
;
100 task
->height
= TILE_SIZE
+ y
* TILE_SIZE
> task
->scene
->fb
.height
?
101 task
->scene
->fb
.height
- y
* TILE_SIZE
: TILE_SIZE
;
103 /* reset pointers to color and depth tile(s) */
104 memset(task
->color_tiles
, 0, sizeof(task
->color_tiles
));
105 task
->depth_tile
= NULL
;
110 * Clear the rasterizer's current color tile.
111 * This is a bin command called during bin processing.
112 * Clear commands always clear all bound layers.
115 lp_rast_clear_color(struct lp_rasterizer_task
*task
,
116 const union lp_rast_cmd_arg arg
)
118 const struct lp_scene
*scene
= task
->scene
;
120 if (scene
->fb
.nr_cbufs
) {
124 if (util_format_is_pure_integer(scene
->fb
.cbufs
[0]->format
)) {
126 * We expect int/uint clear values here, though some APIs
127 * might disagree (but in any case util_pack_color()
128 * couldn't handle it)...
130 LP_DBG(DEBUG_RAST
, "%s pure int 0x%x,0x%x,0x%x,0x%x\n", __FUNCTION__
,
131 arg
.clear_color
.ui
[0],
132 arg
.clear_color
.ui
[1],
133 arg
.clear_color
.ui
[2],
134 arg
.clear_color
.ui
[3]);
136 for (i
= 0; i
< scene
->fb
.nr_cbufs
; i
++) {
137 enum pipe_format format
= scene
->fb
.cbufs
[i
]->format
;
139 if (util_format_is_pure_sint(format
)) {
140 util_format_write_4i(format
, arg
.clear_color
.i
, 0, &uc
, 0, 0, 0, 1, 1);
143 assert(util_format_is_pure_uint(format
));
144 util_format_write_4ui(format
, arg
.clear_color
.ui
, 0, &uc
, 0, 0, 0, 1, 1);
147 util_fill_box(scene
->cbufs
[i
].map
,
149 scene
->cbufs
[i
].stride
,
150 scene
->cbufs
[i
].layer_stride
,
156 scene
->fb_max_layer
+ 1,
161 uint8_t clear_color
[4];
163 for (i
= 0; i
< 4; ++i
) {
164 clear_color
[i
] = float_to_ubyte(arg
.clear_color
.f
[i
]);
167 LP_DBG(DEBUG_RAST
, "%s 0x%x,0x%x,0x%x,0x%x\n", __FUNCTION__
,
173 for (i
= 0; i
< scene
->fb
.nr_cbufs
; i
++) {
174 util_pack_color(arg
.clear_color
.f
,
175 scene
->fb
.cbufs
[i
]->format
, &uc
);
177 util_fill_box(scene
->cbufs
[i
].map
,
178 scene
->fb
.cbufs
[i
]->format
,
179 scene
->cbufs
[i
].stride
,
180 scene
->cbufs
[i
].layer_stride
,
186 scene
->fb_max_layer
+ 1,
192 LP_COUNT(nr_color_tile_clear
);
199 * Clear the rasterizer's current z/stencil tile.
200 * This is a bin command called during bin processing.
201 * Clear commands always clear all bound layers.
204 lp_rast_clear_zstencil(struct lp_rasterizer_task
*task
,
205 const union lp_rast_cmd_arg arg
)
207 const struct lp_scene
*scene
= task
->scene
;
208 uint64_t clear_value64
= arg
.clear_zstencil
.value
;
209 uint64_t clear_mask64
= arg
.clear_zstencil
.mask
;
210 uint32_t clear_value
= (uint32_t) clear_value64
;
211 uint32_t clear_mask
= (uint32_t) clear_mask64
;
212 const unsigned height
= task
->height
;
213 const unsigned width
= task
->width
;
214 const unsigned dst_stride
= scene
->zsbuf
.stride
;
219 LP_DBG(DEBUG_RAST
, "%s: value=0x%08x, mask=0x%08x\n",
220 __FUNCTION__
, clear_value
, clear_mask
);
223 * Clear the area of the depth/depth buffer matching this tile.
226 if (scene
->fb
.zsbuf
) {
228 uint8_t *dst_layer
= lp_rast_get_unswizzled_depth_tile_pointer(task
, LP_TEX_USAGE_READ_WRITE
);
229 block_size
= util_format_get_blocksize(scene
->fb
.zsbuf
->format
);
231 clear_value
&= clear_mask
;
233 for (layer
= 0; layer
<= scene
->fb_max_layer
; layer
++) {
236 switch (block_size
) {
238 assert(clear_mask
== 0xff);
239 memset(dst
, (uint8_t) clear_value
, height
* width
);
242 if (clear_mask
== 0xffff) {
243 for (i
= 0; i
< height
; i
++) {
244 uint16_t *row
= (uint16_t *)dst
;
245 for (j
= 0; j
< width
; j
++)
246 *row
++ = (uint16_t) clear_value
;
251 for (i
= 0; i
< height
; i
++) {
252 uint16_t *row
= (uint16_t *)dst
;
253 for (j
= 0; j
< width
; j
++) {
254 uint16_t tmp
= ~clear_mask
& *row
;
255 *row
++ = clear_value
| tmp
;
262 if (clear_mask
== 0xffffffff) {
263 for (i
= 0; i
< height
; i
++) {
264 uint32_t *row
= (uint32_t *)dst
;
265 for (j
= 0; j
< width
; j
++)
266 *row
++ = clear_value
;
271 for (i
= 0; i
< height
; i
++) {
272 uint32_t *row
= (uint32_t *)dst
;
273 for (j
= 0; j
< width
; j
++) {
274 uint32_t tmp
= ~clear_mask
& *row
;
275 *row
++ = clear_value
| tmp
;
282 clear_value64
&= clear_mask64
;
283 if (clear_mask64
== 0xffffffffffULL
) {
284 for (i
= 0; i
< height
; i
++) {
285 uint64_t *row
= (uint64_t *)dst
;
286 for (j
= 0; j
< width
; j
++)
287 *row
++ = clear_value64
;
292 for (i
= 0; i
< height
; i
++) {
293 uint64_t *row
= (uint64_t *)dst
;
294 for (j
= 0; j
< width
; j
++) {
295 uint64_t tmp
= ~clear_mask64
& *row
;
296 *row
++ = clear_value64
| tmp
;
307 dst_layer
+= scene
->zsbuf
.layer_stride
;
315 * Run the shader on all blocks in a tile. This is used when a tile is
316 * completely contained inside a triangle.
317 * This is a bin command called during bin processing.
320 lp_rast_shade_tile(struct lp_rasterizer_task
*task
,
321 const union lp_rast_cmd_arg arg
)
323 const struct lp_scene
*scene
= task
->scene
;
324 const struct lp_rast_shader_inputs
*inputs
= arg
.shade_tile
;
325 const struct lp_rast_state
*state
;
326 struct lp_fragment_shader_variant
*variant
;
327 const unsigned tile_x
= task
->x
, tile_y
= task
->y
;
330 if (inputs
->disable
) {
331 /* This command was partially binned and has been disabled */
335 LP_DBG(DEBUG_RAST
, "%s\n", __FUNCTION__
);
342 variant
= state
->variant
;
344 /* render the whole 64x64 tile in 4x4 chunks */
345 for (y
= 0; y
< task
->height
; y
+= 4){
346 for (x
= 0; x
< task
->width
; x
+= 4) {
347 uint8_t *color
[PIPE_MAX_COLOR_BUFS
];
348 unsigned stride
[PIPE_MAX_COLOR_BUFS
];
349 uint8_t *depth
= NULL
;
350 unsigned depth_stride
= 0;
354 for (i
= 0; i
< scene
->fb
.nr_cbufs
; i
++){
355 stride
[i
] = scene
->cbufs
[i
].stride
;
356 color
[i
] = lp_rast_get_unswizzled_color_block_pointer(task
, i
, tile_x
+ x
,
357 tile_y
+ y
, inputs
->layer
);
361 if (scene
->zsbuf
.map
) {
362 depth
= lp_rast_get_unswizzled_depth_block_pointer(task
, tile_x
+ x
,
363 tile_y
+ y
, inputs
->layer
);
364 depth_stride
= scene
->zsbuf
.stride
;
367 /* run shader on 4x4 block */
368 BEGIN_JIT_CALL(state
, task
);
369 variant
->jit_function
[RAST_WHOLE
]( &state
->jit_context
,
370 tile_x
+ x
, tile_y
+ y
,
388 * Run the shader on all blocks in a tile. This is used when a tile is
389 * completely contained inside a triangle, and the shader is opaque.
390 * This is a bin command called during bin processing.
393 lp_rast_shade_tile_opaque(struct lp_rasterizer_task
*task
,
394 const union lp_rast_cmd_arg arg
)
396 LP_DBG(DEBUG_RAST
, "%s\n", __FUNCTION__
);
403 lp_rast_shade_tile(task
, arg
);
408 * Compute shading for a 4x4 block of pixels inside a triangle.
409 * This is a bin command called during bin processing.
410 * \param x X position of quad in window coords
411 * \param y Y position of quad in window coords
414 lp_rast_shade_quads_mask(struct lp_rasterizer_task
*task
,
415 const struct lp_rast_shader_inputs
*inputs
,
416 unsigned x
, unsigned y
,
419 const struct lp_rast_state
*state
= task
->state
;
420 struct lp_fragment_shader_variant
*variant
= state
->variant
;
421 const struct lp_scene
*scene
= task
->scene
;
422 uint8_t *color
[PIPE_MAX_COLOR_BUFS
];
423 unsigned stride
[PIPE_MAX_COLOR_BUFS
];
424 uint8_t *depth
= NULL
;
425 unsigned depth_stride
= 0;
431 assert(x
< scene
->tiles_x
* TILE_SIZE
);
432 assert(y
< scene
->tiles_y
* TILE_SIZE
);
433 assert(x
% TILE_VECTOR_WIDTH
== 0);
434 assert(y
% TILE_VECTOR_HEIGHT
== 0);
436 assert((x
% 4) == 0);
437 assert((y
% 4) == 0);
440 for (i
= 0; i
< scene
->fb
.nr_cbufs
; i
++) {
441 stride
[i
] = scene
->cbufs
[i
].stride
;
442 color
[i
] = lp_rast_get_unswizzled_color_block_pointer(task
, i
, x
, y
, inputs
->layer
);
446 if (scene
->zsbuf
.map
) {
447 depth_stride
= scene
->zsbuf
.stride
;
448 depth
= lp_rast_get_unswizzled_depth_block_pointer(task
, x
, y
, inputs
->layer
);
451 assert(lp_check_alignment(state
->jit_context
.u8_blend_color
, 16));
454 * The rasterizer may produce fragments outside our
455 * allocated 4x4 blocks hence need to filter them out here.
457 if ((x
% TILE_SIZE
) < task
->width
&& (y
% TILE_SIZE
) < task
->height
) {
458 /* run shader on 4x4 block */
459 BEGIN_JIT_CALL(state
, task
);
460 variant
->jit_function
[RAST_EDGE_TEST
](&state
->jit_context
,
479 * Begin a new occlusion query.
480 * This is a bin command put in all bins.
484 lp_rast_begin_query(struct lp_rasterizer_task
*task
,
485 const union lp_rast_cmd_arg arg
)
487 struct llvmpipe_query
*pq
= arg
.query_obj
;
489 assert(task
->query
[pq
->type
] == NULL
);
492 case PIPE_QUERY_OCCLUSION_COUNTER
:
493 task
->thread_data
.vis_counter
= 0;
495 case PIPE_QUERY_PRIMITIVES_GENERATED
:
496 case PIPE_QUERY_PRIMITIVES_EMITTED
:
497 case PIPE_QUERY_SO_STATISTICS
:
498 case PIPE_QUERY_PIPELINE_STATISTICS
:
499 case PIPE_QUERY_SO_OVERFLOW_PREDICATE
:
506 task
->query
[pq
->type
] = pq
;
511 * End the current occlusion query.
512 * This is a bin command put in all bins.
516 lp_rast_end_query(struct lp_rasterizer_task
*task
,
517 const union lp_rast_cmd_arg arg
)
519 struct llvmpipe_query
*pq
= arg
.query_obj
;
520 assert(task
->query
[pq
->type
] == pq
|| pq
->type
== PIPE_QUERY_TIMESTAMP
);
523 case PIPE_QUERY_OCCLUSION_COUNTER
:
524 pq
->count
[task
->thread_index
] += task
->thread_data
.vis_counter
;
526 case PIPE_QUERY_TIMESTAMP
:
527 pq
->count
[task
->thread_index
] = os_time_get_nano();
529 case PIPE_QUERY_PRIMITIVES_GENERATED
:
530 case PIPE_QUERY_PRIMITIVES_EMITTED
:
531 case PIPE_QUERY_SO_STATISTICS
:
532 case PIPE_QUERY_PIPELINE_STATISTICS
:
533 case PIPE_QUERY_SO_OVERFLOW_PREDICATE
:
540 if (task
->query
[pq
->type
] == pq
) {
541 task
->query
[pq
->type
] = NULL
;
547 lp_rast_set_state(struct lp_rasterizer_task
*task
,
548 const union lp_rast_cmd_arg arg
)
550 task
->state
= arg
.state
;
556 * Called when we're done writing to a color tile.
559 lp_rast_tile_end(struct lp_rasterizer_task
*task
)
563 for (i
= 0; i
< PIPE_QUERY_TYPES
; ++i
) {
564 if (task
->query
[i
]) {
565 lp_rast_end_query(task
, lp_rast_arg_query(task
->query
[i
]));
570 memset(task
->color_tiles
, 0, sizeof(task
->color_tiles
));
571 task
->depth_tile
= NULL
;
576 static lp_rast_cmd_func dispatch
[LP_RAST_OP_MAX
] =
579 lp_rast_clear_zstencil
,
588 lp_rast_triangle_3_4
,
589 lp_rast_triangle_3_16
,
590 lp_rast_triangle_4_16
,
592 lp_rast_shade_tile_opaque
,
600 do_rasterize_bin(struct lp_rasterizer_task
*task
,
601 const struct cmd_bin
*bin
,
604 const struct cmd_block
*block
;
608 lp_debug_bin(bin
, x
, y
);
610 for (block
= bin
->head
; block
; block
= block
->next
) {
611 for (k
= 0; k
< block
->count
; k
++) {
612 dispatch
[block
->cmd
[k
]]( task
, block
->arg
[k
] );
620 * Rasterize commands for a single bin.
621 * \param x, y position of the bin's tile in the framebuffer
622 * Must be called between lp_rast_begin() and lp_rast_end().
626 rasterize_bin(struct lp_rasterizer_task
*task
,
627 const struct cmd_bin
*bin
, int x
, int y
)
629 lp_rast_tile_begin( task
, bin
, x
, y
);
631 do_rasterize_bin(task
, bin
, x
, y
);
633 lp_rast_tile_end(task
);
638 if (bin
->head
->count
== 1) {
639 if (bin
->head
->cmd
[0] == LP_RAST_OP_SHADE_TILE_OPAQUE
)
640 LP_COUNT(nr_pure_shade_opaque_64
);
641 else if (bin
->head
->cmd
[0] == LP_RAST_OP_SHADE_TILE
)
642 LP_COUNT(nr_pure_shade_64
);
647 /* An empty bin is one that just loads the contents of the tile and
648 * stores them again unchanged. This typically happens when bins have
649 * been flushed for some reason in the middle of a frame, or when
650 * incremental updates are being made to a render target.
652 * Try to avoid doing pointless work in this case.
655 is_empty_bin( const struct cmd_bin
*bin
)
657 return bin
->head
== NULL
;
662 * Rasterize/execute all bins within a scene.
666 rasterize_scene(struct lp_rasterizer_task
*task
,
667 struct lp_scene
*scene
)
671 if (!task
->rast
->no_rast
&& !scene
->discard
) {
672 /* loop over scene bins, rasterize each */
678 while ((bin
= lp_scene_bin_iter_next(scene
, &i
, &j
))) {
679 if (!is_empty_bin( bin
))
680 rasterize_bin(task
, bin
, i
, j
);
687 lp_fence_signal(scene
->fence
);
695 * Called by setup module when it has something for us to render.
698 lp_rast_queue_scene( struct lp_rasterizer
*rast
,
699 struct lp_scene
*scene
)
701 LP_DBG(DEBUG_SETUP
, "%s\n", __FUNCTION__
);
703 if (rast
->num_threads
== 0) {
706 lp_rast_begin( rast
, scene
);
708 rasterize_scene( &rast
->tasks
[0], scene
);
712 rast
->curr_scene
= NULL
;
715 /* threaded rendering! */
718 lp_scene_enqueue( rast
->full_scenes
, scene
);
720 /* signal the threads that there's work to do */
721 for (i
= 0; i
< rast
->num_threads
; i
++) {
722 pipe_semaphore_signal(&rast
->tasks
[i
].work_ready
);
726 LP_DBG(DEBUG_SETUP
, "%s done \n", __FUNCTION__
);
731 lp_rast_finish( struct lp_rasterizer
*rast
)
733 if (rast
->num_threads
== 0) {
739 /* wait for work to complete */
740 for (i
= 0; i
< rast
->num_threads
; i
++) {
741 pipe_semaphore_wait(&rast
->tasks
[i
].work_done
);
748 * This is the thread's main entrypoint.
749 * It's a simple loop:
752 * 3. signal that we're done
754 static PIPE_THREAD_ROUTINE( thread_function
, init_data
)
756 struct lp_rasterizer_task
*task
= (struct lp_rasterizer_task
*) init_data
;
757 struct lp_rasterizer
*rast
= task
->rast
;
758 boolean debug
= false;
763 debug_printf("thread %d waiting for work\n", task
->thread_index
);
764 pipe_semaphore_wait(&task
->work_ready
);
769 if (task
->thread_index
== 0) {
771 * - get next scene to rasterize
772 * - map the framebuffer surfaces
775 lp_scene_dequeue( rast
->full_scenes
, TRUE
) );
778 /* Wait for all threads to get here so that threads[1+] don't
779 * get a null rast->curr_scene pointer.
781 pipe_barrier_wait( &rast
->barrier
);
785 debug_printf("thread %d doing work\n", task
->thread_index
);
787 rasterize_scene(task
,
790 /* wait for all threads to finish with this scene */
791 pipe_barrier_wait( &rast
->barrier
);
793 /* XXX: shouldn't be necessary:
795 if (task
->thread_index
== 0) {
799 /* signal done with work */
801 debug_printf("thread %d done working\n", task
->thread_index
);
803 pipe_semaphore_signal(&task
->work_done
);
811 * Initialize semaphores and spawn the threads.
814 create_rast_threads(struct lp_rasterizer
*rast
)
818 /* NOTE: if num_threads is zero, we won't use any threads */
819 for (i
= 0; i
< rast
->num_threads
; i
++) {
820 pipe_semaphore_init(&rast
->tasks
[i
].work_ready
, 0);
821 pipe_semaphore_init(&rast
->tasks
[i
].work_done
, 0);
822 rast
->threads
[i
] = pipe_thread_create(thread_function
,
823 (void *) &rast
->tasks
[i
]);
830 * Create new lp_rasterizer. If num_threads is zero, don't create any
831 * new threads, do rendering synchronously.
832 * \param num_threads number of rasterizer threads to create
834 struct lp_rasterizer
*
835 lp_rast_create( unsigned num_threads
)
837 struct lp_rasterizer
*rast
;
840 rast
= CALLOC_STRUCT(lp_rasterizer
);
845 rast
->full_scenes
= lp_scene_queue_create();
846 if (!rast
->full_scenes
) {
850 for (i
= 0; i
< Elements(rast
->tasks
); i
++) {
851 struct lp_rasterizer_task
*task
= &rast
->tasks
[i
];
853 task
->thread_index
= i
;
856 rast
->num_threads
= num_threads
;
858 rast
->no_rast
= debug_get_bool_option("LP_NO_RAST", FALSE
);
860 create_rast_threads(rast
);
862 /* for synchronizing rasterization threads */
863 pipe_barrier_init( &rast
->barrier
, rast
->num_threads
);
865 memset(lp_dummy_tile
, 0, sizeof lp_dummy_tile
);
878 void lp_rast_destroy( struct lp_rasterizer
*rast
)
882 /* Set exit_flag and signal each thread's work_ready semaphore.
883 * Each thread will be woken up, notice that the exit_flag is set and
884 * break out of its main loop. The thread will then exit.
886 rast
->exit_flag
= TRUE
;
887 for (i
= 0; i
< rast
->num_threads
; i
++) {
888 pipe_semaphore_signal(&rast
->tasks
[i
].work_ready
);
891 /* Wait for threads to terminate before cleaning up per-thread data */
892 for (i
= 0; i
< rast
->num_threads
; i
++) {
893 pipe_thread_wait(rast
->threads
[i
]);
896 /* Clean up per-thread data */
897 for (i
= 0; i
< rast
->num_threads
; i
++) {
898 pipe_semaphore_destroy(&rast
->tasks
[i
].work_ready
);
899 pipe_semaphore_destroy(&rast
->tasks
[i
].work_done
);
902 /* for synchronizing rasterization threads */
903 pipe_barrier_destroy( &rast
->barrier
);
905 lp_scene_queue_destroy(rast
->full_scenes
);
911 /** Return number of rasterization threads */
913 lp_rast_get_num_threads( struct lp_rasterizer
*rast
)
915 return rast
->num_threads
;