1 /**************************************************************************
3 * Copyright 2009 VMware, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
29 #include "util/u_memory.h"
30 #include "util/u_math.h"
31 #include "util/u_rect.h"
32 #include "util/u_surface.h"
33 #include "util/u_pack_color.h"
35 #include "os/os_time.h"
37 #include "lp_scene_queue.h"
43 #include "lp_rast_priv.h"
44 #include "gallivm/lp_bld_debug.h"
46 #include "lp_tex_sample.h"
51 const struct lp_rast_state
*jit_state
= NULL
;
52 const struct lp_rasterizer_task
*jit_task
= NULL
;
57 * Begin rasterizing a scene.
58 * Called once per scene by one thread.
61 lp_rast_begin( struct lp_rasterizer
*rast
,
62 struct lp_scene
*scene
)
65 rast
->curr_scene
= scene
;
67 LP_DBG(DEBUG_RAST
, "%s\n", __FUNCTION__
);
69 lp_scene_begin_rasterization( scene
);
70 lp_scene_bin_iter_begin( scene
);
75 lp_rast_end( struct lp_rasterizer
*rast
)
77 lp_scene_end_rasterization( rast
->curr_scene
);
79 rast
->curr_scene
= NULL
;
84 * Begining rasterization of a tile.
85 * \param x window X position of the tile, in pixels
86 * \param y window Y position of the tile, in pixels
89 lp_rast_tile_begin(struct lp_rasterizer_task
*task
,
90 const struct cmd_bin
*bin
)
92 const struct lp_scene
*scene
= task
->scene
;
93 enum lp_texture_usage usage
;
95 LP_DBG(DEBUG_RAST
, "%s %d,%d\n", __FUNCTION__
, bin
->x
, bin
->y
);
98 task
->x
= bin
->x
* TILE_SIZE
;
99 task
->y
= bin
->y
* TILE_SIZE
;
101 /* reset pointers to color tile(s) */
102 memset(task
->color_tiles
, 0, sizeof(task
->color_tiles
));
104 /* get pointer to depth/stencil tile */
106 struct pipe_surface
*zsbuf
= task
->scene
->fb
.zsbuf
;
108 struct llvmpipe_resource
*lpt
= llvmpipe_resource(zsbuf
->texture
);
110 if (scene
->has_depthstencil_clear
)
111 usage
= LP_TEX_USAGE_WRITE_ALL
;
113 usage
= LP_TEX_USAGE_READ_WRITE
;
115 /* "prime" the tile: convert data from linear to tiled if necessary
116 * and update the tile's layout info.
118 (void) llvmpipe_get_texture_tile(lpt
,
119 zsbuf
->u
.tex
.first_layer
,
124 /* Get actual pointer to the tile data. Note that depth/stencil
125 * data is tiled differently than color data.
127 task
->depth_tile
= lp_rast_get_depth_block_pointer(task
,
131 assert(task
->depth_tile
);
134 task
->depth_tile
= NULL
;
141 * Clear the rasterizer's current color tile.
142 * This is a bin command called during bin processing.
145 lp_rast_clear_color(struct lp_rasterizer_task
*task
,
146 const union lp_rast_cmd_arg arg
)
148 const struct lp_scene
*scene
= task
->scene
;
149 uint8_t clear_color
[4];
153 for (i
= 0; i
< 4; ++i
) {
154 clear_color
[i
] = float_to_ubyte(arg
.clear_color
[i
]);
157 LP_DBG(DEBUG_RAST
, "%s 0x%x,0x%x,0x%x,0x%x\n", __FUNCTION__
,
163 for (i
= 0; i
< scene
->fb
.nr_cbufs
; i
++) {
164 const struct lp_scene
*scene
= task
->scene
;
167 util_pack_color(arg
.clear_color
,
168 scene
->fb
.cbufs
[i
]->format
, &uc
);
170 util_fill_rect(scene
->cbufs
[i
].map
,
171 scene
->fb
.cbufs
[i
]->format
,
172 scene
->cbufs
[i
].stride
,
180 LP_COUNT(nr_color_tile_clear
);
189 * Clear the rasterizer's current z/stencil tile.
190 * This is a bin command called during bin processing.
193 lp_rast_clear_zstencil(struct lp_rasterizer_task
*task
,
194 const union lp_rast_cmd_arg arg
)
196 const struct lp_scene
*scene
= task
->scene
;
197 uint32_t clear_value
= arg
.clear_zstencil
.value
;
198 uint32_t clear_mask
= arg
.clear_zstencil
.mask
;
199 const unsigned height
= TILE_SIZE
/ TILE_VECTOR_HEIGHT
;
200 const unsigned width
= TILE_SIZE
* TILE_VECTOR_HEIGHT
;
201 const unsigned block_size
= scene
->zsbuf
.blocksize
;
202 const unsigned dst_stride
= scene
->zsbuf
.stride
* TILE_VECTOR_HEIGHT
;
206 LP_DBG(DEBUG_RAST
, "%s: value=0x%08x, mask=0x%08x\n",
207 __FUNCTION__
, clear_value
, clear_mask
);
210 * Clear the area of the swizzled depth/depth buffer matching this tile, in
211 * stripes of TILE_VECTOR_HEIGHT x TILE_SIZE at a time.
213 * The swizzled depth format is such that the depths for
214 * TILE_VECTOR_HEIGHT x TILE_VECTOR_WIDTH pixels have consecutive offsets.
217 dst
= task
->depth_tile
;
219 clear_value
&= clear_mask
;
221 switch (block_size
) {
223 assert(clear_mask
== 0xff);
224 memset(dst
, (uint8_t) clear_value
, height
* width
);
227 if (clear_mask
== 0xffff) {
228 for (i
= 0; i
< height
; i
++) {
229 uint16_t *row
= (uint16_t *)dst
;
230 for (j
= 0; j
< width
; j
++)
231 *row
++ = (uint16_t) clear_value
;
236 for (i
= 0; i
< height
; i
++) {
237 uint16_t *row
= (uint16_t *)dst
;
238 for (j
= 0; j
< width
; j
++) {
239 uint16_t tmp
= ~clear_mask
& *row
;
240 *row
++ = clear_value
| tmp
;
247 if (clear_mask
== 0xffffffff) {
248 for (i
= 0; i
< height
; i
++) {
249 uint32_t *row
= (uint32_t *)dst
;
250 for (j
= 0; j
< width
; j
++)
251 *row
++ = clear_value
;
256 for (i
= 0; i
< height
; i
++) {
257 uint32_t *row
= (uint32_t *)dst
;
258 for (j
= 0; j
< width
; j
++) {
259 uint32_t tmp
= ~clear_mask
& *row
;
260 *row
++ = clear_value
| tmp
;
275 * Run the shader on all blocks in a tile. This is used when a tile is
276 * completely contained inside a triangle.
277 * This is a bin command called during bin processing.
280 lp_rast_shade_tile(struct lp_rasterizer_task
*task
,
281 const union lp_rast_cmd_arg arg
)
283 const struct lp_scene
*scene
= task
->scene
;
284 const struct lp_rast_shader_inputs
*inputs
= arg
.shade_tile
;
285 const struct lp_rast_state
*state
;
286 struct lp_fragment_shader_variant
*variant
;
287 const unsigned tile_x
= task
->x
, tile_y
= task
->y
;
290 if (inputs
->disable
) {
291 /* This command was partially binned and has been disabled */
295 LP_DBG(DEBUG_RAST
, "%s\n", __FUNCTION__
);
302 variant
= state
->variant
;
304 /* render the whole 64x64 tile in 4x4 chunks */
305 for (y
= 0; y
< TILE_SIZE
; y
+= 4){
306 for (x
= 0; x
< TILE_SIZE
; x
+= 4) {
307 uint8_t *color
[PIPE_MAX_COLOR_BUFS
];
308 unsigned stride
[PIPE_MAX_COLOR_BUFS
];
313 for (i
= 0; i
< scene
->fb
.nr_cbufs
; i
++){
314 stride
[i
] = scene
->cbufs
[i
].stride
;
316 color
[i
] = lp_rast_get_unswizzled_color_block_pointer(task
, i
, tile_x
+ x
, tile_y
+ y
);
320 depth
= lp_rast_get_depth_block_pointer(task
, tile_x
+ x
, tile_y
+ y
);
322 /* run shader on 4x4 block */
323 BEGIN_JIT_CALL(state
, task
);
324 variant
->jit_function
[RAST_WHOLE
]( &state
->jit_context
,
325 tile_x
+ x
, tile_y
+ y
,
342 * Run the shader on all blocks in a tile. This is used when a tile is
343 * completely contained inside a triangle, and the shader is opaque.
344 * This is a bin command called during bin processing.
347 lp_rast_shade_tile_opaque(struct lp_rasterizer_task
*task
,
348 const union lp_rast_cmd_arg arg
)
350 LP_DBG(DEBUG_RAST
, "%s\n", __FUNCTION__
);
357 lp_rast_shade_tile(task
, arg
);
362 * Compute shading for a 4x4 block of pixels inside a triangle.
363 * This is a bin command called during bin processing.
364 * \param x X position of quad in window coords
365 * \param y Y position of quad in window coords
368 lp_rast_shade_quads_mask(struct lp_rasterizer_task
*task
,
369 const struct lp_rast_shader_inputs
*inputs
,
370 unsigned x
, unsigned y
,
373 const struct lp_rast_state
*state
= task
->state
;
374 struct lp_fragment_shader_variant
*variant
= state
->variant
;
375 const struct lp_scene
*scene
= task
->scene
;
376 uint8_t *color
[PIPE_MAX_COLOR_BUFS
];
377 unsigned stride
[PIPE_MAX_COLOR_BUFS
];
384 assert(x
< scene
->tiles_x
* TILE_SIZE
);
385 assert(y
< scene
->tiles_y
* TILE_SIZE
);
386 assert(x
% TILE_VECTOR_WIDTH
== 0);
387 assert(y
% TILE_VECTOR_HEIGHT
== 0);
389 assert((x
% 4) == 0);
390 assert((y
% 4) == 0);
393 for (i
= 0; i
< scene
->fb
.nr_cbufs
; i
++) {
394 stride
[i
] = scene
->cbufs
[i
].stride
;
396 color
[i
] = lp_rast_get_unswizzled_color_block_pointer(task
, i
, x
, y
);
400 depth
= lp_rast_get_depth_block_pointer(task
, x
, y
);
403 assert(lp_check_alignment(state
->jit_context
.u8_blend_color
, 16));
405 /* run shader on 4x4 block */
406 BEGIN_JIT_CALL(state
, task
);
407 variant
->jit_function
[RAST_EDGE_TEST
](&state
->jit_context
,
424 * Begin a new occlusion query.
425 * This is a bin command put in all bins.
429 lp_rast_begin_query(struct lp_rasterizer_task
*task
,
430 const union lp_rast_cmd_arg arg
)
432 struct llvmpipe_query
*pq
= arg
.query_obj
;
434 assert(task
->query
[pq
->type
] == NULL
);
437 case PIPE_QUERY_OCCLUSION_COUNTER
:
438 task
->vis_counter
= 0;
440 case PIPE_QUERY_TIME_ELAPSED
:
441 task
->query_start
= os_time_get_nano();
448 task
->query
[pq
->type
] = pq
;
453 * End the current occlusion query.
454 * This is a bin command put in all bins.
458 lp_rast_end_query(struct lp_rasterizer_task
*task
,
459 const union lp_rast_cmd_arg arg
)
461 struct llvmpipe_query
*pq
= arg
.query_obj
;
462 assert(task
->query
[pq
->type
] == pq
|| pq
->type
== PIPE_QUERY_TIMESTAMP
);
465 case PIPE_QUERY_OCCLUSION_COUNTER
:
466 pq
->count
[task
->thread_index
] += task
->vis_counter
;
468 case PIPE_QUERY_TIME_ELAPSED
:
469 pq
->count
[task
->thread_index
] = os_time_get_nano() - task
->query_start
;
471 case PIPE_QUERY_TIMESTAMP
:
472 pq
->count
[task
->thread_index
] = os_time_get_nano();
479 if (task
->query
[pq
->type
] == pq
) {
480 task
->query
[pq
->type
] = NULL
;
486 lp_rast_set_state(struct lp_rasterizer_task
*task
,
487 const union lp_rast_cmd_arg arg
)
489 task
->state
= arg
.state
;
495 * Called when we're done writing to a color tile.
498 lp_rast_tile_end(struct lp_rasterizer_task
*task
)
502 for (i
= 0; i
< PIPE_QUERY_TYPES
; ++i
) {
503 if (task
->query
[i
]) {
504 lp_rast_end_query(task
, lp_rast_arg_query(task
->query
[i
]));
509 memset(task
->color_tiles
, 0, sizeof(task
->color_tiles
));
510 task
->depth_tile
= NULL
;
515 static lp_rast_cmd_func dispatch
[LP_RAST_OP_MAX
] =
518 lp_rast_clear_zstencil
,
527 lp_rast_triangle_3_4
,
528 lp_rast_triangle_3_16
,
529 lp_rast_triangle_4_16
,
531 lp_rast_shade_tile_opaque
,
539 do_rasterize_bin(struct lp_rasterizer_task
*task
,
540 const struct cmd_bin
*bin
)
542 const struct cmd_block
*block
;
548 for (block
= bin
->head
; block
; block
= block
->next
) {
549 for (k
= 0; k
< block
->count
; k
++) {
550 dispatch
[block
->cmd
[k
]]( task
, block
->arg
[k
] );
558 * Rasterize commands for a single bin.
559 * \param x, y position of the bin's tile in the framebuffer
560 * Must be called between lp_rast_begin() and lp_rast_end().
564 rasterize_bin(struct lp_rasterizer_task
*task
,
565 const struct cmd_bin
*bin
)
567 lp_rast_tile_begin( task
, bin
);
569 do_rasterize_bin(task
, bin
);
571 lp_rast_tile_end(task
);
576 if (bin
->head
->count
== 1) {
577 if (bin
->head
->cmd
[0] == LP_RAST_OP_SHADE_TILE_OPAQUE
)
578 LP_COUNT(nr_pure_shade_opaque_64
);
579 else if (bin
->head
->cmd
[0] == LP_RAST_OP_SHADE_TILE
)
580 LP_COUNT(nr_pure_shade_64
);
585 /* An empty bin is one that just loads the contents of the tile and
586 * stores them again unchanged. This typically happens when bins have
587 * been flushed for some reason in the middle of a frame, or when
588 * incremental updates are being made to a render target.
590 * Try to avoid doing pointless work in this case.
593 is_empty_bin( const struct cmd_bin
*bin
)
595 return bin
->head
== NULL
;
600 * Rasterize/execute all bins within a scene.
604 rasterize_scene(struct lp_rasterizer_task
*task
,
605 struct lp_scene
*scene
)
609 if (!task
->rast
->no_rast
) {
610 /* loop over scene bins, rasterize each */
614 for (i
= 0; i
< scene
->tiles_x
; i
++) {
615 for (j
= 0; j
< scene
->tiles_y
; j
++) {
616 struct cmd_bin
*bin
= lp_scene_get_bin(scene
, i
, j
);
617 rasterize_bin(task
, bin
, i
, j
);
626 while ((bin
= lp_scene_bin_iter_next(scene
))) {
627 if (!is_empty_bin( bin
))
628 rasterize_bin(task
, bin
);
636 lp_fence_signal(scene
->fence
);
644 * Called by setup module when it has something for us to render.
647 lp_rast_queue_scene( struct lp_rasterizer
*rast
,
648 struct lp_scene
*scene
)
650 LP_DBG(DEBUG_SETUP
, "%s\n", __FUNCTION__
);
652 if (rast
->num_threads
== 0) {
655 lp_rast_begin( rast
, scene
);
657 rasterize_scene( &rast
->tasks
[0], scene
);
661 rast
->curr_scene
= NULL
;
664 /* threaded rendering! */
667 lp_scene_enqueue( rast
->full_scenes
, scene
);
669 /* signal the threads that there's work to do */
670 for (i
= 0; i
< rast
->num_threads
; i
++) {
671 pipe_semaphore_signal(&rast
->tasks
[i
].work_ready
);
675 LP_DBG(DEBUG_SETUP
, "%s done \n", __FUNCTION__
);
680 lp_rast_finish( struct lp_rasterizer
*rast
)
682 if (rast
->num_threads
== 0) {
688 /* wait for work to complete */
689 for (i
= 0; i
< rast
->num_threads
; i
++) {
690 pipe_semaphore_wait(&rast
->tasks
[i
].work_done
);
697 * This is the thread's main entrypoint.
698 * It's a simple loop:
701 * 3. signal that we're done
703 static PIPE_THREAD_ROUTINE( thread_function
, init_data
)
705 struct lp_rasterizer_task
*task
= (struct lp_rasterizer_task
*) init_data
;
706 struct lp_rasterizer
*rast
= task
->rast
;
707 boolean debug
= false;
712 debug_printf("thread %d waiting for work\n", task
->thread_index
);
713 pipe_semaphore_wait(&task
->work_ready
);
718 if (task
->thread_index
== 0) {
720 * - get next scene to rasterize
721 * - map the framebuffer surfaces
724 lp_scene_dequeue( rast
->full_scenes
, TRUE
) );
727 /* Wait for all threads to get here so that threads[1+] don't
728 * get a null rast->curr_scene pointer.
730 pipe_barrier_wait( &rast
->barrier
);
734 debug_printf("thread %d doing work\n", task
->thread_index
);
736 rasterize_scene(task
,
739 /* wait for all threads to finish with this scene */
740 pipe_barrier_wait( &rast
->barrier
);
742 /* XXX: shouldn't be necessary:
744 if (task
->thread_index
== 0) {
748 /* signal done with work */
750 debug_printf("thread %d done working\n", task
->thread_index
);
752 pipe_semaphore_signal(&task
->work_done
);
760 * Initialize semaphores and spawn the threads.
763 create_rast_threads(struct lp_rasterizer
*rast
)
767 /* NOTE: if num_threads is zero, we won't use any threads */
768 for (i
= 0; i
< rast
->num_threads
; i
++) {
769 pipe_semaphore_init(&rast
->tasks
[i
].work_ready
, 0);
770 pipe_semaphore_init(&rast
->tasks
[i
].work_done
, 0);
771 rast
->threads
[i
] = pipe_thread_create(thread_function
,
772 (void *) &rast
->tasks
[i
]);
779 * Create new lp_rasterizer. If num_threads is zero, don't create any
780 * new threads, do rendering synchronously.
781 * \param num_threads number of rasterizer threads to create
783 struct lp_rasterizer
*
784 lp_rast_create( unsigned num_threads
)
786 struct lp_rasterizer
*rast
;
789 rast
= CALLOC_STRUCT(lp_rasterizer
);
794 rast
->full_scenes
= lp_scene_queue_create();
795 if (!rast
->full_scenes
) {
799 for (i
= 0; i
< Elements(rast
->tasks
); i
++) {
800 struct lp_rasterizer_task
*task
= &rast
->tasks
[i
];
802 task
->thread_index
= i
;
805 rast
->num_threads
= num_threads
;
807 rast
->no_rast
= debug_get_bool_option("LP_NO_RAST", FALSE
);
809 create_rast_threads(rast
);
811 /* for synchronizing rasterization threads */
812 pipe_barrier_init( &rast
->barrier
, rast
->num_threads
);
814 memset(lp_swizzled_cbuf
, 0, sizeof lp_swizzled_cbuf
);
816 memset(lp_dummy_tile
, 0, sizeof lp_dummy_tile
);
829 void lp_rast_destroy( struct lp_rasterizer
*rast
)
833 /* Set exit_flag and signal each thread's work_ready semaphore.
834 * Each thread will be woken up, notice that the exit_flag is set and
835 * break out of its main loop. The thread will then exit.
837 rast
->exit_flag
= TRUE
;
838 for (i
= 0; i
< rast
->num_threads
; i
++) {
839 pipe_semaphore_signal(&rast
->tasks
[i
].work_ready
);
842 /* Wait for threads to terminate before cleaning up per-thread data */
843 for (i
= 0; i
< rast
->num_threads
; i
++) {
844 pipe_thread_wait(rast
->threads
[i
]);
847 /* Clean up per-thread data */
848 for (i
= 0; i
< rast
->num_threads
; i
++) {
849 pipe_semaphore_destroy(&rast
->tasks
[i
].work_ready
);
850 pipe_semaphore_destroy(&rast
->tasks
[i
].work_done
);
853 /* for synchronizing rasterization threads */
854 pipe_barrier_destroy( &rast
->barrier
);
856 lp_scene_queue_destroy(rast
->full_scenes
);
862 /** Return number of rasterization threads */
864 lp_rast_get_num_threads( struct lp_rasterizer
*rast
)
866 return rast
->num_threads
;