1 /**************************************************************************
3 * Copyright 2009 VMware, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
29 #include "util/u_memory.h"
30 #include "util/u_math.h"
31 #include "util/u_cpu_detect.h"
32 #include "util/u_surface.h"
34 #include "lp_scene_queue.h"
38 #include "lp_rast_priv.h"
39 #include "lp_tile_soa.h"
40 #include "lp_bld_debug.h"
45 * Begin the rasterization phase.
46 * Map the framebuffer surfaces. Initialize the 'rast' state.
49 lp_rast_begin( struct lp_rasterizer
*rast
,
50 const struct pipe_framebuffer_state
*fb
,
52 boolean write_zstencil
)
54 struct pipe_screen
*screen
= rast
->screen
;
55 struct pipe_surface
*cbuf
, *zsbuf
;
58 LP_DBG(DEBUG_RAST
, "%s\n", __FUNCTION__
);
60 util_copy_framebuffer_state(&rast
->state
.fb
, fb
);
62 rast
->state
.write_zstencil
= write_zstencil
;
63 rast
->state
.write_color
= write_color
;
65 rast
->check_for_clipped_tiles
= (fb
->width
% TILE_SIZE
!= 0 ||
66 fb
->height
% TILE_SIZE
!= 0);
69 for (i
= 0; i
< rast
->state
.fb
.nr_cbufs
; i
++) {
70 cbuf
= rast
->state
.fb
.cbufs
[i
];
72 rast
->cbuf_transfer
[i
] = screen
->get_tex_transfer(rast
->screen
,
77 PIPE_TRANSFER_READ_WRITE
,
81 if (!rast
->cbuf_transfer
[i
])
84 rast
->cbuf_map
[i
] = screen
->transfer_map(rast
->screen
,
85 rast
->cbuf_transfer
[i
]);
86 if (!rast
->cbuf_map
[i
])
91 zsbuf
= rast
->state
.fb
.zsbuf
;
93 rast
->zsbuf_transfer
= screen
->get_tex_transfer(rast
->screen
,
98 PIPE_TRANSFER_READ_WRITE
,
102 if (!rast
->zsbuf_transfer
)
105 rast
->zsbuf_map
= screen
->transfer_map(rast
->screen
,
106 rast
->zsbuf_transfer
);
107 if (!rast
->zsbuf_map
)
114 /* Unmap and release transfers?
121 * Finish the rasterization phase.
122 * Unmap framebuffer surfaces.
125 lp_rast_end( struct lp_rasterizer
*rast
)
127 struct pipe_screen
*screen
= rast
->screen
;
130 for (i
= 0; i
< rast
->state
.fb
.nr_cbufs
; i
++) {
131 if (rast
->cbuf_map
[i
])
132 screen
->transfer_unmap(screen
, rast
->cbuf_transfer
[i
]);
134 if (rast
->cbuf_transfer
[i
])
135 screen
->tex_transfer_destroy(rast
->cbuf_transfer
[i
]);
137 rast
->cbuf_transfer
[i
] = NULL
;
138 rast
->cbuf_map
[i
] = NULL
;
142 screen
->transfer_unmap(screen
, rast
->zsbuf_transfer
);
144 if (rast
->zsbuf_transfer
)
145 screen
->tex_transfer_destroy(rast
->zsbuf_transfer
);
147 rast
->zsbuf_transfer
= NULL
;
148 rast
->zsbuf_map
= NULL
;
153 * Begining rasterization of a tile.
154 * \param x window X position of the tile, in pixels
155 * \param y window Y position of the tile, in pixels
158 lp_rast_start_tile( struct lp_rasterizer
*rast
,
159 unsigned thread_index
,
160 unsigned x
, unsigned y
)
162 LP_DBG(DEBUG_RAST
, "%s %d,%d\n", __FUNCTION__
, x
, y
);
164 rast
->tasks
[thread_index
].x
= x
;
165 rast
->tasks
[thread_index
].y
= y
;
170 * Clear the rasterizer's current color tile.
171 * This is a bin command called during bin processing.
173 void lp_rast_clear_color( struct lp_rasterizer
*rast
,
174 unsigned thread_index
,
175 const union lp_rast_cmd_arg arg
)
177 const uint8_t *clear_color
= arg
.clear_color
;
178 uint8_t **color_tile
= rast
->tasks
[thread_index
].tile
.color
;
181 LP_DBG(DEBUG_RAST
, "%s 0x%x,0x%x,0x%x,0x%x\n", __FUNCTION__
,
187 if (clear_color
[0] == clear_color
[1] &&
188 clear_color
[1] == clear_color
[2] &&
189 clear_color
[2] == clear_color
[3]) {
190 /* clear to grayscale value {x, x, x, x} */
191 for (i
= 0; i
< rast
->state
.fb
.nr_cbufs
; i
++) {
192 memset(color_tile
[i
], clear_color
[0], TILE_SIZE
* TILE_SIZE
* 4);
197 * Note: if the swizzled tile layout changes (see TILE_PIXEL) this code
198 * will need to change. It'll be pretty obvious when clearing no longer
201 const unsigned chunk
= TILE_SIZE
/ 4;
202 for (i
= 0; i
< rast
->state
.fb
.nr_cbufs
; i
++) {
203 uint8_t *c
= color_tile
[i
];
205 for (j
= 0; j
< 4 * TILE_SIZE
; j
++) {
206 memset(c
, clear_color
[0], chunk
);
208 memset(c
, clear_color
[1], chunk
);
210 memset(c
, clear_color
[2], chunk
);
212 memset(c
, clear_color
[3], chunk
);
215 assert(c
- color_tile
[i
] == TILE_SIZE
* TILE_SIZE
* 4);
222 * Clear the rasterizer's current z/stencil tile.
223 * This is a bin command called during bin processing.
225 void lp_rast_clear_zstencil( struct lp_rasterizer
*rast
,
226 unsigned thread_index
,
227 const union lp_rast_cmd_arg arg
)
230 uint32_t *depth_tile
= rast
->tasks
[thread_index
].tile
.depth
;
232 LP_DBG(DEBUG_RAST
, "%s 0x%x\n", __FUNCTION__
, arg
.clear_zstencil
);
234 for (i
= 0; i
< TILE_SIZE
* TILE_SIZE
; i
++)
235 depth_tile
[i
] = arg
.clear_zstencil
;
240 * Load tile color from the framebuffer surface.
241 * This is a bin command called during bin processing.
243 void lp_rast_load_color( struct lp_rasterizer
*rast
,
244 unsigned thread_index
,
245 const union lp_rast_cmd_arg arg
)
247 struct lp_rasterizer_task
*task
= &rast
->tasks
[thread_index
];
248 const unsigned x
= task
->x
;
249 const unsigned y
= task
->y
;
252 LP_DBG(DEBUG_RAST
, "%s at %u, %u\n", __FUNCTION__
, x
, y
);
254 for (i
= 0; i
< rast
->state
.fb
.nr_cbufs
; i
++) {
255 struct pipe_transfer
*transfer
= rast
->cbuf_transfer
[i
];
259 if (x
>= transfer
->width
)
262 if (y
>= transfer
->height
)
264 /* XXX: require tile-size aligned render target dimensions:
266 if (x
+ w
> transfer
->width
)
267 w
-= x
+ w
- transfer
->width
;
269 if (y
+ h
> transfer
->height
)
270 h
-= y
+ h
- transfer
->height
;
274 assert(w
<= TILE_SIZE
);
275 assert(h
<= TILE_SIZE
);
277 lp_tile_read_4ub(transfer
->texture
->format
,
278 rast
->tasks
[thread_index
].tile
.color
[i
],
288 lp_tile_read_z32(uint32_t *tile
,
291 unsigned x0
, unsigned y0
, unsigned w
, unsigned h
)
294 const uint8_t *map_row
= map
+ y0
*map_stride
;
295 for (y
= 0; y
< h
; ++y
) {
296 const uint32_t *map_pixel
= (uint32_t *)(map_row
+ x0
*4);
297 for (x
= 0; x
< w
; ++x
) {
298 *tile
++ = *map_pixel
++;
300 map_row
+= map_stride
;
305 * Load tile z/stencil from the framebuffer surface.
306 * This is a bin command called during bin processing.
308 void lp_rast_load_zstencil( struct lp_rasterizer
*rast
,
309 unsigned thread_index
,
310 const union lp_rast_cmd_arg arg
)
312 const unsigned x
= rast
->tasks
[thread_index
].x
;
313 const unsigned y
= rast
->tasks
[thread_index
].y
;
314 unsigned w
= TILE_SIZE
;
315 unsigned h
= TILE_SIZE
;
317 if (x
+ w
> rast
->state
.fb
.width
)
318 w
-= x
+ w
- rast
->state
.fb
.width
;
320 if (y
+ h
> rast
->state
.fb
.height
)
321 h
-= y
+ h
- rast
->state
.fb
.height
;
323 LP_DBG(DEBUG_RAST
, "%s %d,%d %dx%d\n", __FUNCTION__
, x
, y
, w
, h
);
325 assert(rast
->zsbuf_transfer
->texture
->format
== PIPE_FORMAT_Z32_UNORM
);
326 lp_tile_read_z32(rast
->tasks
[thread_index
].tile
.depth
,
328 rast
->zsbuf_transfer
->stride
,
333 void lp_rast_set_state( struct lp_rasterizer
*rast
,
334 unsigned thread_index
,
335 const union lp_rast_cmd_arg arg
)
337 const struct lp_rast_state
*state
= arg
.set_state
;
339 LP_DBG(DEBUG_RAST
, "%s %p\n", __FUNCTION__
, (void *) state
);
341 /* just set the current state pointer for this rasterizer */
342 rast
->tasks
[thread_index
].current_state
= state
;
348 * Run the shader on all blocks in a tile. This is used when a tile is
349 * completely contained inside a triangle.
350 * This is a bin command called during bin processing.
352 void lp_rast_shade_tile( struct lp_rasterizer
*rast
,
353 unsigned thread_index
,
354 const union lp_rast_cmd_arg arg
)
356 const struct lp_rast_state
*state
= rast
->tasks
[thread_index
].current_state
;
357 struct lp_rast_tile
*tile
= &rast
->tasks
[thread_index
].tile
;
358 const struct lp_rast_shader_inputs
*inputs
= arg
.shade_tile
;
359 const unsigned tile_x
= rast
->tasks
[thread_index
].x
;
360 const unsigned tile_y
= rast
->tasks
[thread_index
].y
;
363 LP_DBG(DEBUG_RAST
, "%s\n", __FUNCTION__
);
365 /* render the whole 64x64 tile in 4x4 chunks */
366 for (y
= 0; y
< TILE_SIZE
; y
+= 4){
367 for (x
= 0; x
< TILE_SIZE
; x
+= 4) {
368 uint8_t *color
[PIPE_MAX_COLOR_BUFS
];
370 unsigned block_offset
, i
;
372 /* offset of the 16x16 pixel block within the tile */
373 block_offset
= ((y
/ 4) * (16 * 16) + (x
/ 4) * 16);
376 for (i
= 0; i
< rast
->state
.fb
.nr_cbufs
; i
++)
377 color
[i
] = tile
->color
[i
] + 4 * block_offset
;
380 depth
= tile
->depth
+ block_offset
;
383 state
->jit_function
[0]( &state
->jit_context
,
384 tile_x
+ x
, tile_y
+ y
,
390 INT_MIN
, INT_MIN
, INT_MIN
,
398 * Compute shading for a 4x4 block of pixels.
399 * This is a bin command called during bin processing.
401 void lp_rast_shade_quads( struct lp_rasterizer
*rast
,
402 unsigned thread_index
,
403 const struct lp_rast_shader_inputs
*inputs
,
404 unsigned x
, unsigned y
,
405 int32_t c1
, int32_t c2
, int32_t c3
)
407 const struct lp_rast_state
*state
= rast
->tasks
[thread_index
].current_state
;
408 struct lp_rast_tile
*tile
= &rast
->tasks
[thread_index
].tile
;
409 uint8_t *color
[PIPE_MAX_COLOR_BUFS
];
419 assert(x
% TILE_VECTOR_WIDTH
== 0);
420 assert(y
% TILE_VECTOR_HEIGHT
== 0);
422 assert((x
% 4) == 0);
423 assert((y
% 4) == 0);
429 /* offset of the 16x16 pixel block within the tile */
430 block_offset
= ((iy
/ 4) * (16 * 16) + (ix
/ 4) * 16);
433 for (i
= 0; i
< rast
->state
.fb
.nr_cbufs
; i
++)
434 color
[i
] = tile
->color
[i
] + 4 * block_offset
;
437 depth
= tile
->depth
+ block_offset
;
442 assert(lp_check_alignment(tile
->depth
, 16));
443 assert(lp_check_alignment(tile
->color
[0], 16));
444 assert(lp_check_alignment(state
->jit_context
.blend_color
, 16));
446 assert(lp_check_alignment(inputs
->step
[0], 16));
447 assert(lp_check_alignment(inputs
->step
[1], 16));
448 assert(lp_check_alignment(inputs
->step
[2], 16));
452 state
->jit_function
[1]( &state
->jit_context
,
460 inputs
->step
[0], inputs
->step
[1], inputs
->step
[2]);
467 * Write the rasterizer's color tile to the framebuffer.
469 static void lp_rast_store_color( struct lp_rasterizer
*rast
,
470 unsigned thread_index
)
472 const unsigned x
= rast
->tasks
[thread_index
].x
;
473 const unsigned y
= rast
->tasks
[thread_index
].y
;
476 for (i
= 0; i
< rast
->state
.fb
.nr_cbufs
; i
++) {
477 struct pipe_transfer
*transfer
= rast
->cbuf_transfer
[i
];
481 if (x
>= transfer
->width
)
484 if (y
>= transfer
->height
)
487 /* XXX: require tile-size aligned render target dimensions:
489 if (x
+ w
> transfer
->width
)
490 w
-= x
+ w
- transfer
->width
;
492 if (y
+ h
> transfer
->height
)
493 h
-= y
+ h
- transfer
->height
;
497 assert(w
<= TILE_SIZE
);
498 assert(h
<= TILE_SIZE
);
500 LP_DBG(DEBUG_RAST
, "%s [%u] %d,%d %dx%d\n", __FUNCTION__
,
501 thread_index
, x
, y
, w
, h
);
503 lp_tile_write_4ub(transfer
->texture
->format
,
504 rast
->tasks
[thread_index
].tile
.color
[i
],
514 lp_tile_write_z32(const uint32_t *src
, uint8_t *dst
, unsigned dst_stride
,
515 unsigned x0
, unsigned y0
, unsigned w
, unsigned h
)
518 uint8_t *dst_row
= dst
+ y0
*dst_stride
;
519 for (y
= 0; y
< h
; ++y
) {
520 uint32_t *dst_pixel
= (uint32_t *)(dst_row
+ x0
*4);
521 for (x
= 0; x
< w
; ++x
) {
522 *dst_pixel
++ = *src
++;
524 dst_row
+= dst_stride
;
529 * Write the rasterizer's z/stencil tile to the framebuffer.
531 static void lp_rast_store_zstencil( struct lp_rasterizer
*rast
,
532 unsigned thread_index
)
534 const unsigned x
= rast
->tasks
[thread_index
].x
;
535 const unsigned y
= rast
->tasks
[thread_index
].y
;
536 unsigned w
= TILE_SIZE
;
537 unsigned h
= TILE_SIZE
;
539 if (x
+ w
> rast
->state
.fb
.width
)
540 w
-= x
+ w
- rast
->state
.fb
.width
;
542 if (y
+ h
> rast
->state
.fb
.height
)
543 h
-= y
+ h
- rast
->state
.fb
.height
;
545 LP_DBG(DEBUG_RAST
, "%s %d,%d %dx%d\n", __FUNCTION__
, x
, y
, w
, h
);
547 assert(rast
->zsbuf_transfer
->texture
->format
== PIPE_FORMAT_Z32_UNORM
);
548 lp_tile_write_z32(rast
->tasks
[thread_index
].tile
.depth
,
550 rast
->zsbuf_transfer
->stride
,
556 * Write the rasterizer's tiles to the framebuffer.
559 lp_rast_end_tile( struct lp_rasterizer
*rast
,
560 unsigned thread_index
)
562 LP_DBG(DEBUG_RAST
, "%s\n", __FUNCTION__
);
564 if (rast
->state
.write_color
)
565 lp_rast_store_color(rast
, thread_index
);
567 if (rast
->state
.write_zstencil
)
568 lp_rast_store_zstencil(rast
, thread_index
);
573 * Signal on a fence. This is called during bin execution/rasterization.
576 void lp_rast_fence( struct lp_rasterizer
*rast
,
577 unsigned thread_index
,
578 const union lp_rast_cmd_arg arg
)
580 struct lp_fence
*fence
= arg
.fence
;
582 pipe_mutex_lock( fence
->mutex
);
585 assert(fence
->count
<= fence
->rank
);
587 LP_DBG(DEBUG_RAST
, "%s count=%u rank=%u\n", __FUNCTION__
,
588 fence
->count
, fence
->rank
);
590 pipe_condvar_signal( fence
->signalled
);
592 pipe_mutex_unlock( fence
->mutex
);
597 * When all the threads are done rasterizing a scene, one thread will
598 * call this function to reset the scene and put it onto the empty queue.
601 release_scene( struct lp_rasterizer
*rast
,
602 struct lp_scene
*scene
)
604 util_unreference_framebuffer_state( &scene
->fb
);
606 lp_scene_reset( scene
);
607 lp_scene_enqueue( rast
->empty_scenes
, scene
);
608 rast
->curr_scene
= NULL
;
613 * Rasterize commands for a single bin.
614 * \param x, y position of the bin's tile in the framebuffer
615 * Must be called between lp_rast_begin() and lp_rast_end().
619 rasterize_bin( struct lp_rasterizer
*rast
,
620 unsigned thread_index
,
621 const struct cmd_bin
*bin
,
624 const struct cmd_block_list
*commands
= &bin
->commands
;
625 struct cmd_block
*block
;
628 lp_rast_start_tile( rast
, thread_index
, x
, y
);
630 /* simply execute each of the commands in the block list */
631 for (block
= commands
->head
; block
; block
= block
->next
) {
632 for (k
= 0; k
< block
->count
; k
++) {
633 block
->cmd
[k
]( rast
, thread_index
, block
->arg
[k
] );
637 lp_rast_end_tile( rast
, thread_index
);
641 #define RAST(x) { lp_rast_##x, #x }
651 RAST(clear_zstencil
),
659 debug_bin( const struct cmd_bin
*bin
)
661 const struct cmd_block
*head
= bin
->commands
.head
;
664 for (i
= 0; i
< head
->count
; i
++) {
665 debug_printf("%d: ", i
);
666 for (j
= 0; j
< Elements(cmd_names
); j
++) {
667 if (head
->cmd
[i
] == cmd_names
[j
].cmd
) {
668 debug_printf("%s\n", cmd_names
[j
].name
);
672 if (j
== Elements(cmd_names
))
673 debug_printf("...other\n");
678 /* An empty bin is one that just loads the contents of the tile and
679 * stores them again unchanged. This typically happens when bins have
680 * been flushed for some reason in the middle of a frame, or when
681 * incremental updates are being made to a render target.
683 * Try to avoid doing pointless work in this case.
686 is_empty_bin( const struct cmd_bin
*bin
)
688 const struct cmd_block
*head
= bin
->commands
.head
;
694 /* We emit at most two load-tile commands at the start of the first
695 * command block. In addition we seem to emit a couple of
696 * set-state commands even in empty bins.
698 * As a heuristic, if a bin has more than 4 commands, consider it
701 if (head
->next
!= NULL
||
706 for (i
= 0; i
< head
->count
; i
++)
707 if (head
->cmd
[i
] != lp_rast_load_color
&&
708 head
->cmd
[i
] != lp_rast_load_zstencil
&&
709 head
->cmd
[i
] != lp_rast_set_state
) {
719 * Rasterize/execute all bins within a scene.
723 rasterize_scene( struct lp_rasterizer
*rast
,
724 unsigned thread_index
,
725 struct lp_scene
*scene
,
728 /* loop over scene bins, rasterize each */
732 for (i
= 0; i
< scene
->tiles_x
; i
++) {
733 for (j
= 0; j
< scene
->tiles_y
; j
++) {
734 struct cmd_bin
*bin
= lp_get_bin(scene
, i
, j
);
735 rasterize_bin( rast
, thread_index
,
736 bin
, i
* TILE_SIZE
, j
* TILE_SIZE
);
746 while ((bin
= lp_scene_bin_iter_next(scene
, &x
, &y
))) {
747 if (!is_empty_bin( bin
))
748 rasterize_bin( rast
, thread_index
, bin
, x
* TILE_SIZE
, y
* TILE_SIZE
);
756 * Called by setup module when it has something for us to render.
759 lp_rasterize_scene( struct lp_rasterizer
*rast
,
760 struct lp_scene
*scene
,
761 const struct pipe_framebuffer_state
*fb
,
764 boolean debug
= false;
766 LP_DBG(DEBUG_SETUP
, "%s\n", __FUNCTION__
);
770 printf("rasterize scene:\n");
771 printf(" data size: %u\n", lp_scene_data_size(scene
));
772 for (y
= 0; y
< scene
->tiles_y
; y
++) {
773 for (x
= 0; x
< scene
->tiles_x
; x
++) {
774 printf(" bin %u, %u size: %u\n", x
, y
,
775 lp_scene_bin_size(scene
, x
, y
));
780 /* save framebuffer state in the bin */
781 util_copy_framebuffer_state(&scene
->fb
, fb
);
782 scene
->write_depth
= write_depth
;
784 if (rast
->num_threads
== 0) {
787 lp_rast_begin( rast
, fb
,
788 fb
->nr_cbufs
!= 0, /* always write color if cbufs present */
789 fb
->zsbuf
!= NULL
&& write_depth
);
791 lp_scene_bin_iter_begin( scene
);
792 rasterize_scene( rast
, 0, scene
, write_depth
);
794 release_scene( rast
, scene
);
799 /* threaded rendering! */
802 lp_scene_enqueue( rast
->full_scenes
, scene
);
804 /* signal the threads that there's work to do */
805 for (i
= 0; i
< rast
->num_threads
; i
++) {
806 pipe_semaphore_signal(&rast
->tasks
[i
].work_ready
);
809 /* wait for work to complete */
810 for (i
= 0; i
< rast
->num_threads
; i
++) {
811 pipe_semaphore_wait(&rast
->tasks
[i
].work_done
);
815 LP_DBG(DEBUG_SETUP
, "%s done \n", __FUNCTION__
);
820 * This is the thread's main entrypoint.
821 * It's a simple loop:
824 * 3. signal that we're done
827 thread_func( void *init_data
)
829 struct lp_rasterizer_task
*task
= (struct lp_rasterizer_task
*) init_data
;
830 struct lp_rasterizer
*rast
= task
->rast
;
831 boolean debug
= false;
836 debug_printf("thread %d waiting for work\n", task
->thread_index
);
837 pipe_semaphore_wait(&task
->work_ready
);
839 if (task
->thread_index
== 0) {
841 * - get next scene to rasterize
842 * - map the framebuffer surfaces
844 const struct pipe_framebuffer_state
*fb
;
847 rast
->curr_scene
= lp_scene_dequeue( rast
->full_scenes
);
849 lp_scene_bin_iter_begin( rast
->curr_scene
);
851 fb
= &rast
->curr_scene
->fb
;
852 write_depth
= rast
->curr_scene
->write_depth
;
854 lp_rast_begin( rast
, fb
,
856 fb
->zsbuf
!= NULL
&& write_depth
);
859 /* Wait for all threads to get here so that threads[1+] don't
860 * get a null rast->curr_scene pointer.
862 pipe_barrier_wait( &rast
->barrier
);
866 debug_printf("thread %d doing work\n", task
->thread_index
);
867 rasterize_scene(rast
,
870 rast
->curr_scene
->write_depth
);
872 /* wait for all threads to finish with this scene */
873 pipe_barrier_wait( &rast
->barrier
);
875 if (task
->thread_index
== 0) {
877 * - release the scene object
878 * - unmap the framebuffer surfaces
880 release_scene( rast
, rast
->curr_scene
);
884 /* signal done with work */
886 debug_printf("thread %d done working\n", task
->thread_index
);
887 pipe_semaphore_signal(&task
->work_done
);
895 * Initialize semaphores and spawn the threads.
898 create_rast_threads(struct lp_rasterizer
*rast
)
902 rast
->num_threads
= util_cpu_caps
.nr_cpus
;
903 rast
->num_threads
= debug_get_num_option("LP_NUM_THREADS", rast
->num_threads
);
904 rast
->num_threads
= MIN2(rast
->num_threads
, MAX_THREADS
);
906 /* NOTE: if num_threads is zero, we won't use any threads */
907 for (i
= 0; i
< rast
->num_threads
; i
++) {
908 pipe_semaphore_init(&rast
->tasks
[i
].work_ready
, 0);
909 pipe_semaphore_init(&rast
->tasks
[i
].work_done
, 0);
910 rast
->threads
[i
] = pipe_thread_create(thread_func
,
911 (void *) &rast
->tasks
[i
]);
918 * Create new lp_rasterizer.
919 * \param empty the queue to put empty scenes on after we've finished
922 struct lp_rasterizer
*
923 lp_rast_create( struct pipe_screen
*screen
, struct lp_scene_queue
*empty
)
925 struct lp_rasterizer
*rast
;
928 rast
= CALLOC_STRUCT(lp_rasterizer
);
932 rast
->screen
= screen
;
934 rast
->empty_scenes
= empty
;
935 rast
->full_scenes
= lp_scene_queue_create();
937 for (i
= 0; i
< Elements(rast
->tasks
); i
++) {
938 for (cbuf
= 0; cbuf
< PIPE_MAX_COLOR_BUFS
; cbuf
++ )
939 rast
->tasks
[i
].tile
.color
[cbuf
] = align_malloc( TILE_SIZE
*TILE_SIZE
*4, 16 );
941 rast
->tasks
[i
].tile
.depth
= align_malloc( TILE_SIZE
*TILE_SIZE
*4, 16 );
942 rast
->tasks
[i
].rast
= rast
;
943 rast
->tasks
[i
].thread_index
= i
;
946 create_rast_threads(rast
);
948 /* for synchronizing rasterization threads */
949 pipe_barrier_init( &rast
->barrier
, rast
->num_threads
);
957 void lp_rast_destroy( struct lp_rasterizer
*rast
)
961 util_unreference_framebuffer_state(&rast
->state
.fb
);
963 for (i
= 0; i
< Elements(rast
->tasks
); i
++) {
964 align_free(rast
->tasks
[i
].tile
.depth
);
965 for (cbuf
= 0; cbuf
< PIPE_MAX_COLOR_BUFS
; cbuf
++ )
966 align_free(rast
->tasks
[i
].tile
.color
[cbuf
]);
969 /* for synchronizing rasterization threads */
970 pipe_barrier_destroy( &rast
->barrier
);
976 /** Return number of rasterization threads */
978 lp_rast_get_num_threads( struct lp_rasterizer
*rast
)
980 return rast
->num_threads
;