1 /**************************************************************************
3 * Copyright 2009 VMware, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
29 #include "util/u_memory.h"
30 #include "util/u_math.h"
31 #include "util/u_cpu_detect.h"
32 #include "util/u_surface.h"
34 #include "lp_scene_queue.h"
38 #include "lp_rast_priv.h"
39 #include "lp_tile_soa.h"
40 #include "lp_bld_debug.h"
45 * Begin the rasterization phase.
46 * Map the framebuffer surfaces. Initialize the 'rast' state.
49 lp_rast_begin( struct lp_rasterizer
*rast
,
50 const struct pipe_framebuffer_state
*fb
,
52 boolean write_zstencil
)
54 struct pipe_screen
*screen
= rast
->screen
;
55 struct pipe_surface
*cbuf
, *zsbuf
;
57 LP_DBG(DEBUG_RAST
, "%s\n", __FUNCTION__
);
59 util_copy_framebuffer_state(&rast
->state
.fb
, fb
);
61 rast
->state
.write_zstencil
= write_zstencil
;
62 rast
->state
.write_color
= write_color
;
64 rast
->check_for_clipped_tiles
= (fb
->width
% TILE_SIZE
!= 0 ||
65 fb
->height
% TILE_SIZE
!= 0);
67 /* XXX support multiple color buffers here */
68 cbuf
= rast
->state
.fb
.cbufs
[0];
70 rast
->cbuf_transfer
= screen
->get_tex_transfer(rast
->screen
,
75 PIPE_TRANSFER_READ_WRITE
,
77 fb
->width
, fb
->height
);
78 if (!rast
->cbuf_transfer
)
81 rast
->cbuf_map
= screen
->transfer_map(rast
->screen
,
87 zsbuf
= rast
->state
.fb
.zsbuf
;
89 rast
->zsbuf_transfer
= screen
->get_tex_transfer(rast
->screen
,
94 PIPE_TRANSFER_READ_WRITE
,
96 fb
->width
, fb
->height
);
97 if (!rast
->zsbuf_transfer
)
100 rast
->zsbuf_map
= screen
->transfer_map(rast
->screen
,
101 rast
->zsbuf_transfer
);
102 if (!rast
->zsbuf_map
)
111 * Finish the rasterization phase.
112 * Unmap framebuffer surfaces.
115 lp_rast_end( struct lp_rasterizer
*rast
)
117 struct pipe_screen
*screen
= rast
->screen
;
120 screen
->transfer_unmap(screen
, rast
->cbuf_transfer
);
123 screen
->transfer_unmap(screen
, rast
->zsbuf_transfer
);
125 if (rast
->cbuf_transfer
)
126 screen
->tex_transfer_destroy(rast
->cbuf_transfer
);
128 if (rast
->zsbuf_transfer
)
129 screen
->tex_transfer_destroy(rast
->zsbuf_transfer
);
131 rast
->cbuf_transfer
= NULL
;
132 rast
->zsbuf_transfer
= NULL
;
133 rast
->cbuf_map
= NULL
;
134 rast
->zsbuf_map
= NULL
;
139 * Begining rasterization of a tile.
140 * \param x window X position of the tile, in pixels
141 * \param y window Y position of the tile, in pixels
144 lp_rast_start_tile( struct lp_rasterizer
*rast
,
145 unsigned thread_index
,
146 unsigned x
, unsigned y
)
148 LP_DBG(DEBUG_RAST
, "%s %d,%d\n", __FUNCTION__
, x
, y
);
150 rast
->tasks
[thread_index
].x
= x
;
151 rast
->tasks
[thread_index
].y
= y
;
156 * Clear the rasterizer's current color tile.
157 * This is a bin command called during bin processing.
159 void lp_rast_clear_color( struct lp_rasterizer
*rast
,
160 unsigned thread_index
,
161 const union lp_rast_cmd_arg arg
)
163 const uint8_t *clear_color
= arg
.clear_color
;
164 uint8_t *color_tile
= rast
->tasks
[thread_index
].tile
.color
;
166 LP_DBG(DEBUG_RAST
, "%s 0x%x,0x%x,0x%x,0x%x\n", __FUNCTION__
,
172 if (clear_color
[0] == clear_color
[1] &&
173 clear_color
[1] == clear_color
[2] &&
174 clear_color
[2] == clear_color
[3]) {
175 memset(color_tile
, clear_color
[0], TILE_SIZE
* TILE_SIZE
* 4);
179 for (y
= 0; y
< TILE_SIZE
; y
++)
180 for (x
= 0; x
< TILE_SIZE
; x
++)
181 for (chan
= 0; chan
< 4; ++chan
)
182 TILE_PIXEL(color_tile
, x
, y
, chan
) = clear_color
[chan
];
188 * Clear the rasterizer's current z/stencil tile.
189 * This is a bin command called during bin processing.
191 void lp_rast_clear_zstencil( struct lp_rasterizer
*rast
,
192 unsigned thread_index
,
193 const union lp_rast_cmd_arg arg
)
196 uint32_t *depth_tile
= rast
->tasks
[thread_index
].tile
.depth
;
198 LP_DBG(DEBUG_RAST
, "%s 0x%x\n", __FUNCTION__
, arg
.clear_zstencil
);
200 for (i
= 0; i
< TILE_SIZE
; i
++)
201 for (j
= 0; j
< TILE_SIZE
; j
++)
202 depth_tile
[i
*TILE_SIZE
+ j
] = arg
.clear_zstencil
;
207 * Load tile color from the framebuffer surface.
208 * This is a bin command called during bin processing.
210 void lp_rast_load_color( struct lp_rasterizer
*rast
,
211 unsigned thread_index
,
212 const union lp_rast_cmd_arg arg
)
214 struct lp_rasterizer_task
*task
= &rast
->tasks
[thread_index
];
215 const unsigned x
= task
->x
;
216 const unsigned y
= task
->y
;
220 LP_DBG(DEBUG_RAST
, "%s at %u, %u\n", __FUNCTION__
, x
, y
);
222 if (x
+ w
> rast
->state
.fb
.width
)
223 w
-= x
+ w
- rast
->state
.fb
.width
;
225 if (y
+ h
> rast
->state
.fb
.height
)
226 h
-= y
+ h
- rast
->state
.fb
.height
;
230 assert(w
<= TILE_SIZE
);
231 assert(h
<= TILE_SIZE
);
233 lp_tile_read_4ub(rast
->cbuf_transfer
->format
,
234 rast
->tasks
[thread_index
].tile
.color
,
236 rast
->cbuf_transfer
->stride
,
243 * Load tile z/stencil from the framebuffer surface.
244 * This is a bin command called during bin processing.
246 void lp_rast_load_zstencil( struct lp_rasterizer
*rast
,
247 unsigned thread_index
,
248 const union lp_rast_cmd_arg arg
)
250 LP_DBG(DEBUG_RAST
, "%s\n", __FUNCTION__
);
252 /* call u_tile func to load depth (and stencil?) from surface */
256 void lp_rast_set_state( struct lp_rasterizer
*rast
,
257 unsigned thread_index
,
258 const union lp_rast_cmd_arg arg
)
260 const struct lp_rast_state
*state
= arg
.set_state
;
262 LP_DBG(DEBUG_RAST
, "%s %p\n", __FUNCTION__
, (void *) state
);
264 /* just set the current state pointer for this rasterizer */
265 rast
->tasks
[thread_index
].current_state
= state
;
274 * Run the shader on all blocks in a tile. This is used when a tile is
275 * completely contained inside a triangle.
276 * This is a bin command called during bin processing.
278 void lp_rast_shade_tile( struct lp_rasterizer
*rast
,
279 unsigned thread_index
,
280 const union lp_rast_cmd_arg arg
)
282 /* Set c1,c2,c3 to large values so the in/out test always passes */
283 const int32_t c1
= INT_MIN
, c2
= INT_MIN
, c3
= INT_MIN
;
284 const struct lp_rast_shader_inputs
*inputs
= arg
.shade_tile
;
285 const unsigned tile_x
= rast
->tasks
[thread_index
].x
;
286 const unsigned tile_y
= rast
->tasks
[thread_index
].y
;
289 LP_DBG(DEBUG_RAST
, "%s\n", __FUNCTION__
);
291 /* Use the existing preference for 4x4 (four quads) shading:
293 for (y
= 0; y
< TILE_SIZE
; y
+= 4)
294 for (x
= 0; x
< TILE_SIZE
; x
+= 4)
295 lp_rast_shade_quads( rast
,
305 * Compute shading for a 4x4 block of pixels.
306 * This is a bin command called during bin processing.
308 void lp_rast_shade_quads( struct lp_rasterizer
*rast
,
309 unsigned thread_index
,
310 const struct lp_rast_shader_inputs
*inputs
,
311 unsigned x
, unsigned y
,
312 int32_t c1
, int32_t c2
, int32_t c3
)
314 const struct lp_rast_state
*state
= rast
->tasks
[thread_index
].current_state
;
315 struct lp_rast_tile
*tile
= &rast
->tasks
[thread_index
].tile
;
325 assert(x
% TILE_VECTOR_WIDTH
== 0);
326 assert(y
% TILE_VECTOR_HEIGHT
== 0);
328 assert((x
% 4) == 0);
329 assert((y
% 4) == 0);
335 /* offset of the 16x16 pixel block within the tile */
336 block_offset
= ((iy
/4)*(16*16) + (ix
/4)*16);
339 color
= tile
->color
+ 4 * block_offset
;
342 depth
= tile
->depth
+ block_offset
;
345 assert(lp_check_alignment(depth
, 16));
346 assert(lp_check_alignment(color
, 16));
347 assert(lp_check_alignment(state
->jit_context
.blend_color
, 16));
349 assert(lp_check_alignment(inputs
->step
[0], 16));
350 assert(lp_check_alignment(inputs
->step
[1], 16));
351 assert(lp_check_alignment(inputs
->step
[2], 16));
355 state
->jit_function( &state
->jit_context
,
363 inputs
->step
[0], inputs
->step
[1], inputs
->step
[2]
373 * Write the rasterizer's color tile to the framebuffer.
375 static void lp_rast_store_color( struct lp_rasterizer
*rast
,
376 unsigned thread_index
)
378 const unsigned x
= rast
->tasks
[thread_index
].x
;
379 const unsigned y
= rast
->tasks
[thread_index
].y
;
383 if (x
+ w
> rast
->state
.fb
.width
)
384 w
-= x
+ w
- rast
->state
.fb
.width
;
386 if (y
+ h
> rast
->state
.fb
.height
)
387 h
-= y
+ h
- rast
->state
.fb
.height
;
391 assert(w
<= TILE_SIZE
);
392 assert(h
<= TILE_SIZE
);
394 LP_DBG(DEBUG_RAST
, "%s [%u] %d,%d %dx%d\n", __FUNCTION__
,
395 thread_index
, x
, y
, w
, h
);
397 lp_tile_write_4ub(rast
->cbuf_transfer
->format
,
398 rast
->tasks
[thread_index
].tile
.color
,
400 rast
->cbuf_transfer
->stride
,
407 lp_tile_write_z32(const uint32_t *src
, uint8_t *dst
, unsigned dst_stride
,
408 unsigned x0
, unsigned y0
, unsigned w
, unsigned h
)
411 uint8_t *dst_row
= dst
+ y0
*dst_stride
;
412 for (y
= 0; y
< h
; ++y
) {
413 uint32_t *dst_pixel
= (uint32_t *)(dst_row
+ x0
*4);
414 for (x
= 0; x
< w
; ++x
) {
415 *dst_pixel
++ = *src
++;
417 dst_row
+= dst_stride
;
422 * Write the rasterizer's z/stencil tile to the framebuffer.
424 static void lp_rast_store_zstencil( struct lp_rasterizer
*rast
,
425 unsigned thread_index
)
427 const unsigned x
= rast
->tasks
[thread_index
].x
;
428 const unsigned y
= rast
->tasks
[thread_index
].y
;
429 unsigned w
= TILE_SIZE
;
430 unsigned h
= TILE_SIZE
;
432 if (x
+ w
> rast
->state
.fb
.width
)
433 w
-= x
+ w
- rast
->state
.fb
.width
;
435 if (y
+ h
> rast
->state
.fb
.height
)
436 h
-= y
+ h
- rast
->state
.fb
.height
;
438 LP_DBG(DEBUG_RAST
, "%s %d,%d %dx%d\n", __FUNCTION__
, x
, y
, w
, h
);
440 assert(rast
->zsbuf_transfer
->format
== PIPE_FORMAT_Z32_UNORM
);
441 lp_tile_write_z32(rast
->tasks
[thread_index
].tile
.depth
,
443 rast
->zsbuf_transfer
->stride
,
449 * Write the rasterizer's tiles to the framebuffer.
452 lp_rast_end_tile( struct lp_rasterizer
*rast
,
453 unsigned thread_index
)
455 LP_DBG(DEBUG_RAST
, "%s\n", __FUNCTION__
);
457 if (rast
->state
.write_color
)
458 lp_rast_store_color(rast
, thread_index
);
460 if (rast
->state
.write_zstencil
)
461 lp_rast_store_zstencil(rast
, thread_index
);
466 * Signal on a fence. This is called during bin execution/rasterization.
469 void lp_rast_fence( struct lp_rasterizer
*rast
,
470 unsigned thread_index
,
471 const union lp_rast_cmd_arg arg
)
473 struct lp_fence
*fence
= arg
.fence
;
475 pipe_mutex_lock( fence
->mutex
);
478 assert(fence
->count
<= fence
->rank
);
480 LP_DBG(DEBUG_RAST
, "%s count=%u rank=%u\n", __FUNCTION__
,
481 fence
->count
, fence
->rank
);
483 pipe_condvar_signal( fence
->signalled
);
485 pipe_mutex_unlock( fence
->mutex
);
490 * When all the threads are done rasterizing a scene, one thread will
491 * call this function to reset the scene and put it onto the empty queue.
494 release_scene( struct lp_rasterizer
*rast
,
495 struct lp_scene
*scene
)
497 util_unreference_framebuffer_state( &scene
->fb
);
499 lp_scene_reset( scene
);
500 lp_scene_enqueue( rast
->empty_scenes
, scene
);
501 rast
->curr_scene
= NULL
;
506 * Rasterize commands for a single bin.
507 * \param x, y position of the bin's tile in the framebuffer
508 * Must be called between lp_rast_begin() and lp_rast_end().
512 rasterize_bin( struct lp_rasterizer
*rast
,
513 unsigned thread_index
,
514 const struct cmd_bin
*bin
,
517 const struct cmd_block_list
*commands
= &bin
->commands
;
518 struct cmd_block
*block
;
521 lp_rast_start_tile( rast
, thread_index
, x
, y
);
523 /* simply execute each of the commands in the block list */
524 for (block
= commands
->head
; block
; block
= block
->next
) {
525 for (k
= 0; k
< block
->count
; k
++) {
526 block
->cmd
[k
]( rast
, thread_index
, block
->arg
[k
] );
530 lp_rast_end_tile( rast
, thread_index
);
535 * Rasterize/execute all bins within a scene.
539 rasterize_scene( struct lp_rasterizer
*rast
,
540 unsigned thread_index
,
541 struct lp_scene
*scene
,
544 /* loop over scene bins, rasterize each */
548 for (i
= 0; i
< scene
->tiles_x
; i
++) {
549 for (j
= 0; j
< scene
->tiles_y
; j
++) {
550 struct cmd_bin
*bin
= lp_get_bin(scene
, i
, j
);
551 rasterize_bin( rast
, thread_index
,
552 bin
, i
* TILE_SIZE
, j
* TILE_SIZE
);
562 while ((bin
= lp_scene_bin_iter_next(scene
, &x
, &y
))) {
563 rasterize_bin( rast
, thread_index
, bin
, x
* TILE_SIZE
, y
* TILE_SIZE
);
571 * Called by setup module when it has something for us to render.
574 lp_rasterize_scene( struct lp_rasterizer
*rast
,
575 struct lp_scene
*scene
,
576 const struct pipe_framebuffer_state
*fb
,
579 boolean debug
= false;
581 LP_DBG(DEBUG_SETUP
, "%s\n", __FUNCTION__
);
585 printf("rasterize scene:\n");
586 printf(" data size: %u\n", lp_scene_data_size(scene
));
587 for (y
= 0; y
< scene
->tiles_y
; y
++) {
588 for (x
= 0; x
< scene
->tiles_x
; x
++) {
589 printf(" bin %u, %u size: %u\n", x
, y
,
590 lp_scene_bin_size(scene
, x
, y
));
595 /* save framebuffer state in the bin */
596 util_copy_framebuffer_state(&scene
->fb
, fb
);
597 scene
->write_depth
= write_depth
;
599 if (rast
->num_threads
== 0) {
602 lp_rast_begin( rast
, fb
,
604 fb
->zsbuf
!= NULL
&& write_depth
);
606 lp_scene_bin_iter_begin( scene
);
607 rasterize_scene( rast
, 0, scene
, write_depth
);
609 release_scene( rast
, scene
);
614 /* threaded rendering! */
617 lp_scene_enqueue( rast
->full_scenes
, scene
);
619 /* signal the threads that there's work to do */
620 for (i
= 0; i
< rast
->num_threads
; i
++) {
621 pipe_semaphore_signal(&rast
->tasks
[i
].work_ready
);
624 /* wait for work to complete */
625 for (i
= 0; i
< rast
->num_threads
; i
++) {
626 pipe_semaphore_wait(&rast
->tasks
[i
].work_done
);
630 LP_DBG(DEBUG_SETUP
, "%s done \n", __FUNCTION__
);
635 * This is the thread's main entrypoint.
636 * It's a simple loop:
639 * 3. signal that we're done
642 thread_func( void *init_data
)
644 struct lp_rasterizer_task
*task
= (struct lp_rasterizer_task
*) init_data
;
645 struct lp_rasterizer
*rast
= task
->rast
;
646 boolean debug
= false;
651 debug_printf("thread %d waiting for work\n", task
->thread_index
);
652 pipe_semaphore_wait(&task
->work_ready
);
654 if (task
->thread_index
== 0) {
656 * - get next scene to rasterize
657 * - map the framebuffer surfaces
659 const struct pipe_framebuffer_state
*fb
;
662 rast
->curr_scene
= lp_scene_dequeue( rast
->full_scenes
);
664 lp_scene_bin_iter_begin( rast
->curr_scene
);
666 fb
= &rast
->curr_scene
->fb
;
667 write_depth
= rast
->curr_scene
->write_depth
;
669 lp_rast_begin( rast
, fb
,
670 fb
->cbufs
[0] != NULL
,
671 fb
->zsbuf
!= NULL
&& write_depth
);
674 /* Wait for all threads to get here so that threads[1+] don't
675 * get a null rast->curr_scene pointer.
677 pipe_barrier_wait( &rast
->barrier
);
681 debug_printf("thread %d doing work\n", task
->thread_index
);
682 rasterize_scene(rast
,
685 rast
->curr_scene
->write_depth
);
687 /* wait for all threads to finish with this scene */
688 pipe_barrier_wait( &rast
->barrier
);
690 if (task
->thread_index
== 0) {
692 * - release the scene object
693 * - unmap the framebuffer surfaces
695 release_scene( rast
, rast
->curr_scene
);
699 /* signal done with work */
701 debug_printf("thread %d done working\n", task
->thread_index
);
702 pipe_semaphore_signal(&task
->work_done
);
710 * Initialize semaphores and spawn the threads.
713 create_rast_threads(struct lp_rasterizer
*rast
)
717 rast
->num_threads
= util_cpu_caps
.nr_cpus
;
718 rast
->num_threads
= debug_get_num_option("LP_NUM_THREADS", rast
->num_threads
);
719 rast
->num_threads
= MIN2(rast
->num_threads
, MAX_THREADS
);
721 /* NOTE: if num_threads is zero, we won't use any threads */
722 for (i
= 0; i
< rast
->num_threads
; i
++) {
723 pipe_semaphore_init(&rast
->tasks
[i
].work_ready
, 0);
724 pipe_semaphore_init(&rast
->tasks
[i
].work_done
, 0);
725 rast
->threads
[i
] = pipe_thread_create(thread_func
,
726 (void *) &rast
->tasks
[i
]);
733 * Create new lp_rasterizer.
734 * \param empty the queue to put empty scenes on after we've finished
737 struct lp_rasterizer
*
738 lp_rast_create( struct pipe_screen
*screen
, struct lp_scene_queue
*empty
)
740 struct lp_rasterizer
*rast
;
743 rast
= CALLOC_STRUCT(lp_rasterizer
);
747 rast
->screen
= screen
;
749 rast
->empty_scenes
= empty
;
750 rast
->full_scenes
= lp_scene_queue_create();
752 for (i
= 0; i
< Elements(rast
->tasks
); i
++) {
753 rast
->tasks
[i
].tile
.color
= align_malloc( TILE_SIZE
*TILE_SIZE
*4, 16 );
754 rast
->tasks
[i
].tile
.depth
= align_malloc( TILE_SIZE
*TILE_SIZE
*4, 16 );
755 rast
->tasks
[i
].rast
= rast
;
756 rast
->tasks
[i
].thread_index
= i
;
759 create_rast_threads(rast
);
761 /* for synchronizing rasterization threads */
762 pipe_barrier_init( &rast
->barrier
, rast
->num_threads
);
770 void lp_rast_destroy( struct lp_rasterizer
*rast
)
774 util_unreference_framebuffer_state(&rast
->state
.fb
);
776 for (i
= 0; i
< Elements(rast
->tasks
); i
++) {
777 align_free(rast
->tasks
[i
].tile
.depth
);
778 align_free(rast
->tasks
[i
].tile
.color
);
781 /* for synchronizing rasterization threads */
782 pipe_barrier_destroy( &rast
->barrier
);
788 /** Return number of rasterization threads */
790 lp_rast_get_num_threads( struct lp_rasterizer
*rast
)
792 return rast
->num_threads
;