1 /**************************************************************************
3 * Copyright 2009 VMware, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
29 #include "util/u_memory.h"
30 #include "util/u_math.h"
31 #include "util/u_rect.h"
32 #include "util/u_surface.h"
33 #include "util/u_pack_color.h"
34 #include "util/u_string.h"
35 #include "util/u_thread.h"
36 #include "util/u_memset.h"
37 #include "util/os_time.h"
39 #include "lp_scene_queue.h"
40 #include "lp_context.h"
46 #include "lp_rast_priv.h"
47 #include "gallivm/lp_bld_format.h"
48 #include "gallivm/lp_bld_debug.h"
50 #include "lp_tex_sample.h"
55 const struct lp_rast_state
*jit_state
= NULL
;
56 const struct lp_rasterizer_task
*jit_task
= NULL
;
59 const float lp_sample_pos_4x
[4][2] = { { 0.375, 0.125 },
65 * Begin rasterizing a scene.
66 * Called once per scene by one thread.
69 lp_rast_begin( struct lp_rasterizer
*rast
,
70 struct lp_scene
*scene
)
72 rast
->curr_scene
= scene
;
74 LP_DBG(DEBUG_RAST
, "%s\n", __FUNCTION__
);
76 lp_scene_begin_rasterization( scene
);
77 lp_scene_bin_iter_begin( scene
);
82 lp_rast_end( struct lp_rasterizer
*rast
)
84 lp_scene_end_rasterization( rast
->curr_scene
);
86 rast
->curr_scene
= NULL
;
91 * Beginning rasterization of a tile.
92 * \param x window X position of the tile, in pixels
93 * \param y window Y position of the tile, in pixels
96 lp_rast_tile_begin(struct lp_rasterizer_task
*task
,
97 const struct cmd_bin
*bin
,
101 struct lp_scene
*scene
= task
->scene
;
103 LP_DBG(DEBUG_RAST
, "%s %d,%d\n", __FUNCTION__
, x
, y
);
106 task
->x
= x
* TILE_SIZE
;
107 task
->y
= y
* TILE_SIZE
;
108 task
->width
= TILE_SIZE
+ x
* TILE_SIZE
> task
->scene
->fb
.width
?
109 task
->scene
->fb
.width
- x
* TILE_SIZE
: TILE_SIZE
;
110 task
->height
= TILE_SIZE
+ y
* TILE_SIZE
> task
->scene
->fb
.height
?
111 task
->scene
->fb
.height
- y
* TILE_SIZE
: TILE_SIZE
;
113 task
->thread_data
.vis_counter
= 0;
114 task
->thread_data
.ps_invocations
= 0;
116 for (i
= 0; i
< task
->scene
->fb
.nr_cbufs
; i
++) {
117 if (task
->scene
->fb
.cbufs
[i
]) {
118 task
->color_tiles
[i
] = scene
->cbufs
[i
].map
+
119 scene
->cbufs
[i
].stride
* task
->y
+
120 scene
->cbufs
[i
].format_bytes
* task
->x
;
123 if (task
->scene
->fb
.zsbuf
) {
124 task
->depth_tile
= scene
->zsbuf
.map
+
125 scene
->zsbuf
.stride
* task
->y
+
126 scene
->zsbuf
.format_bytes
* task
->x
;
132 * Clear the rasterizer's current color tile.
133 * This is a bin command called during bin processing.
134 * Clear commands always clear all bound layers.
137 lp_rast_clear_color(struct lp_rasterizer_task
*task
,
138 const union lp_rast_cmd_arg arg
)
140 const struct lp_scene
*scene
= task
->scene
;
141 unsigned cbuf
= arg
.clear_rb
->cbuf
;
143 enum pipe_format format
;
145 /* we never bin clear commands for non-existing buffers */
146 assert(cbuf
< scene
->fb
.nr_cbufs
);
147 assert(scene
->fb
.cbufs
[cbuf
]);
149 format
= scene
->fb
.cbufs
[cbuf
]->format
;
150 uc
= arg
.clear_rb
->color_val
;
153 * this is pretty rough since we have target format (bunch of bytes...) here.
154 * dump it as raw 4 dwords.
156 LP_DBG(DEBUG_RAST
, "%s clear value (target format %d) raw 0x%x,0x%x,0x%x,0x%x\n",
157 __FUNCTION__
, format
, uc
.ui
[0], uc
.ui
[1], uc
.ui
[2], uc
.ui
[3]);
159 for (unsigned s
= 0; s
< scene
->cbufs
[cbuf
].nr_samples
; s
++) {
160 void *map
= (char *)scene
->cbufs
[cbuf
].map
+ scene
->cbufs
[cbuf
].sample_stride
* s
;
163 scene
->cbufs
[cbuf
].stride
,
164 scene
->cbufs
[cbuf
].layer_stride
,
170 scene
->fb_max_layer
+ 1,
174 /* this will increase for each rb which probably doesn't mean much */
175 LP_COUNT(nr_color_tile_clear
);
180 * Clear the rasterizer's current z/stencil tile.
181 * This is a bin command called during bin processing.
182 * Clear commands always clear all bound layers.
185 lp_rast_clear_zstencil(struct lp_rasterizer_task
*task
,
186 const union lp_rast_cmd_arg arg
)
188 const struct lp_scene
*scene
= task
->scene
;
189 uint64_t clear_value64
= arg
.clear_zstencil
.value
;
190 uint64_t clear_mask64
= arg
.clear_zstencil
.mask
;
191 uint32_t clear_value
= (uint32_t) clear_value64
;
192 uint32_t clear_mask
= (uint32_t) clear_mask64
;
193 const unsigned height
= task
->height
;
194 const unsigned width
= task
->width
;
195 const unsigned dst_stride
= scene
->zsbuf
.stride
;
200 LP_DBG(DEBUG_RAST
, "%s: value=0x%08x, mask=0x%08x\n",
201 __FUNCTION__
, clear_value
, clear_mask
);
204 * Clear the area of the depth/depth buffer matching this tile.
207 if (scene
->fb
.zsbuf
) {
210 for (unsigned s
= 0; s
< scene
->zsbuf
.nr_samples
; s
++) {
211 uint8_t *dst_layer
= task
->depth_tile
+ (s
* scene
->zsbuf
.sample_stride
);
212 block_size
= util_format_get_blocksize(scene
->fb
.zsbuf
->format
);
214 clear_value
&= clear_mask
;
216 for (layer
= 0; layer
<= scene
->fb_max_layer
; layer
++) {
219 switch (block_size
) {
221 assert(clear_mask
== 0xff);
222 for (i
= 0; i
< height
; i
++) {
223 uint8_t *row
= (uint8_t *)dst
;
224 memset(row
, (uint8_t) clear_value
, width
);
229 if (clear_mask
== 0xffff) {
230 for (i
= 0; i
< height
; i
++) {
231 uint16_t *row
= (uint16_t *)dst
;
232 for (j
= 0; j
< width
; j
++)
233 *row
++ = (uint16_t) clear_value
;
238 for (i
= 0; i
< height
; i
++) {
239 uint16_t *row
= (uint16_t *)dst
;
240 for (j
= 0; j
< width
; j
++) {
241 uint16_t tmp
= ~clear_mask
& *row
;
242 *row
++ = clear_value
| tmp
;
249 if (clear_mask
== 0xffffffff) {
250 for (i
= 0; i
< height
; i
++) {
251 util_memset32(dst
, clear_value
, width
);
256 for (i
= 0; i
< height
; i
++) {
257 uint32_t *row
= (uint32_t *)dst
;
258 for (j
= 0; j
< width
; j
++) {
259 uint32_t tmp
= ~clear_mask
& *row
;
260 *row
++ = clear_value
| tmp
;
267 clear_value64
&= clear_mask64
;
268 if (clear_mask64
== 0xffffffffffULL
) {
269 for (i
= 0; i
< height
; i
++) {
270 uint64_t *row
= (uint64_t *)dst
;
271 for (j
= 0; j
< width
; j
++)
272 *row
++ = clear_value64
;
277 for (i
= 0; i
< height
; i
++) {
278 uint64_t *row
= (uint64_t *)dst
;
279 for (j
= 0; j
< width
; j
++) {
280 uint64_t tmp
= ~clear_mask64
& *row
;
281 *row
++ = clear_value64
| tmp
;
292 dst_layer
+= scene
->zsbuf
.layer_stride
;
301 * Run the shader on all blocks in a tile. This is used when a tile is
302 * completely contained inside a triangle.
303 * This is a bin command called during bin processing.
306 lp_rast_shade_tile(struct lp_rasterizer_task
*task
,
307 const union lp_rast_cmd_arg arg
)
309 const struct lp_scene
*scene
= task
->scene
;
310 const struct lp_rast_shader_inputs
*inputs
= arg
.shade_tile
;
311 const struct lp_rast_state
*state
;
312 struct lp_fragment_shader_variant
*variant
;
313 const unsigned tile_x
= task
->x
, tile_y
= task
->y
;
316 if (inputs
->disable
) {
317 /* This command was partially binned and has been disabled */
321 LP_DBG(DEBUG_RAST
, "%s\n", __FUNCTION__
);
328 variant
= state
->variant
;
330 /* render the whole 64x64 tile in 4x4 chunks */
331 for (y
= 0; y
< task
->height
; y
+= 4){
332 for (x
= 0; x
< task
->width
; x
+= 4) {
333 uint8_t *color
[PIPE_MAX_COLOR_BUFS
];
334 unsigned stride
[PIPE_MAX_COLOR_BUFS
];
335 unsigned sample_stride
[PIPE_MAX_COLOR_BUFS
];
336 uint8_t *depth
= NULL
;
337 unsigned depth_stride
= 0;
338 unsigned depth_sample_stride
= 0;
342 for (i
= 0; i
< scene
->fb
.nr_cbufs
; i
++){
343 if (scene
->fb
.cbufs
[i
]) {
344 stride
[i
] = scene
->cbufs
[i
].stride
;
345 sample_stride
[i
] = scene
->cbufs
[i
].sample_stride
;
346 color
[i
] = lp_rast_get_color_block_pointer(task
, i
, tile_x
+ x
,
347 tile_y
+ y
, inputs
->layer
);
351 sample_stride
[i
] = 0;
357 if (scene
->zsbuf
.map
) {
358 depth
= lp_rast_get_depth_block_pointer(task
, tile_x
+ x
,
359 tile_y
+ y
, inputs
->layer
);
360 depth_stride
= scene
->zsbuf
.stride
;
361 depth_sample_stride
= scene
->zsbuf
.sample_stride
;
365 for (unsigned i
= 0; i
< scene
->fb_max_samples
; i
++)
366 mask
|= (uint64_t)(0xffff) << (16 * i
);
368 /* Propagate non-interpolated raster state. */
369 task
->thread_data
.raster_state
.viewport_index
= inputs
->viewport_index
;
371 /* run shader on 4x4 block */
372 BEGIN_JIT_CALL(state
, task
);
373 variant
->jit_function
[RAST_WHOLE
]( &state
->jit_context
,
374 tile_x
+ x
, tile_y
+ y
,
386 depth_sample_stride
);
394 * Run the shader on all blocks in a tile. This is used when a tile is
395 * completely contained inside a triangle, and the shader is opaque.
396 * This is a bin command called during bin processing.
399 lp_rast_shade_tile_opaque(struct lp_rasterizer_task
*task
,
400 const union lp_rast_cmd_arg arg
)
402 LP_DBG(DEBUG_RAST
, "%s\n", __FUNCTION__
);
409 lp_rast_shade_tile(task
, arg
);
414 * Compute shading for a 4x4 block of pixels inside a triangle.
415 * This is a bin command called during bin processing.
416 * \param x X position of quad in window coords
417 * \param y Y position of quad in window coords
420 lp_rast_shade_quads_mask_sample(struct lp_rasterizer_task
*task
,
421 const struct lp_rast_shader_inputs
*inputs
,
422 unsigned x
, unsigned y
,
425 const struct lp_rast_state
*state
= task
->state
;
426 struct lp_fragment_shader_variant
*variant
= state
->variant
;
427 const struct lp_scene
*scene
= task
->scene
;
428 uint8_t *color
[PIPE_MAX_COLOR_BUFS
];
429 unsigned stride
[PIPE_MAX_COLOR_BUFS
];
430 unsigned sample_stride
[PIPE_MAX_COLOR_BUFS
];
431 uint8_t *depth
= NULL
;
432 unsigned depth_stride
= 0;
433 unsigned depth_sample_stride
= 0;
439 assert(x
< scene
->tiles_x
* TILE_SIZE
);
440 assert(y
< scene
->tiles_y
* TILE_SIZE
);
441 assert(x
% TILE_VECTOR_WIDTH
== 0);
442 assert(y
% TILE_VECTOR_HEIGHT
== 0);
444 assert((x
% 4) == 0);
445 assert((y
% 4) == 0);
448 for (i
= 0; i
< scene
->fb
.nr_cbufs
; i
++) {
449 if (scene
->fb
.cbufs
[i
]) {
450 stride
[i
] = scene
->cbufs
[i
].stride
;
451 sample_stride
[i
] = scene
->cbufs
[i
].sample_stride
;
452 color
[i
] = lp_rast_get_color_block_pointer(task
, i
, x
, y
,
457 sample_stride
[i
] = 0;
463 if (scene
->zsbuf
.map
) {
464 depth_stride
= scene
->zsbuf
.stride
;
465 depth_sample_stride
= scene
->zsbuf
.sample_stride
;
466 depth
= lp_rast_get_depth_block_pointer(task
, x
, y
, inputs
->layer
);
469 assert(lp_check_alignment(state
->jit_context
.u8_blend_color
, 16));
472 * The rasterizer may produce fragments outside our
473 * allocated 4x4 blocks hence need to filter them out here.
475 if ((x
% TILE_SIZE
) < task
->width
&& (y
% TILE_SIZE
) < task
->height
) {
476 /* Propagate non-interpolated raster state. */
477 task
->thread_data
.raster_state
.viewport_index
= inputs
->viewport_index
;
479 /* run shader on 4x4 block */
480 BEGIN_JIT_CALL(state
, task
);
481 variant
->jit_function
[RAST_EDGE_TEST
](&state
->jit_context
,
494 depth_sample_stride
);
500 lp_rast_shade_quads_mask(struct lp_rasterizer_task
*task
,
501 const struct lp_rast_shader_inputs
*inputs
,
502 unsigned x
, unsigned y
,
505 uint64_t new_mask
= 0;
506 for (unsigned i
= 0; i
< task
->scene
->fb_max_samples
; i
++)
507 new_mask
|= ((uint64_t)mask
) << (16 * i
);
508 lp_rast_shade_quads_mask_sample(task
, inputs
, x
, y
, new_mask
);
512 * Begin a new occlusion query.
513 * This is a bin command put in all bins.
517 lp_rast_begin_query(struct lp_rasterizer_task
*task
,
518 const union lp_rast_cmd_arg arg
)
520 struct llvmpipe_query
*pq
= arg
.query_obj
;
523 case PIPE_QUERY_OCCLUSION_COUNTER
:
524 case PIPE_QUERY_OCCLUSION_PREDICATE
:
525 case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE
:
526 pq
->start
[task
->thread_index
] = task
->thread_data
.vis_counter
;
528 case PIPE_QUERY_PIPELINE_STATISTICS
:
529 pq
->start
[task
->thread_index
] = task
->thread_data
.ps_invocations
;
539 * End the current occlusion query.
540 * This is a bin command put in all bins.
544 lp_rast_end_query(struct lp_rasterizer_task
*task
,
545 const union lp_rast_cmd_arg arg
)
547 struct llvmpipe_query
*pq
= arg
.query_obj
;
550 case PIPE_QUERY_OCCLUSION_COUNTER
:
551 case PIPE_QUERY_OCCLUSION_PREDICATE
:
552 case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE
:
553 pq
->end
[task
->thread_index
] +=
554 task
->thread_data
.vis_counter
- pq
->start
[task
->thread_index
];
555 pq
->start
[task
->thread_index
] = 0;
557 case PIPE_QUERY_TIMESTAMP
:
558 pq
->end
[task
->thread_index
] = os_time_get_nano();
560 case PIPE_QUERY_PIPELINE_STATISTICS
:
561 pq
->end
[task
->thread_index
] +=
562 task
->thread_data
.ps_invocations
- pq
->start
[task
->thread_index
];
563 pq
->start
[task
->thread_index
] = 0;
573 lp_rast_set_state(struct lp_rasterizer_task
*task
,
574 const union lp_rast_cmd_arg arg
)
576 task
->state
= arg
.state
;
582 * Called when we're done writing to a color tile.
585 lp_rast_tile_end(struct lp_rasterizer_task
*task
)
589 for (i
= 0; i
< task
->scene
->num_active_queries
; ++i
) {
590 lp_rast_end_query(task
, lp_rast_arg_query(task
->scene
->active_queries
[i
]));
594 memset(task
->color_tiles
, 0, sizeof(task
->color_tiles
));
595 task
->depth_tile
= NULL
;
600 static lp_rast_cmd_func dispatch
[LP_RAST_OP_MAX
] =
603 lp_rast_clear_zstencil
,
612 lp_rast_triangle_3_4
,
613 lp_rast_triangle_3_16
,
614 lp_rast_triangle_4_16
,
616 lp_rast_shade_tile_opaque
,
620 lp_rast_triangle_32_1
,
621 lp_rast_triangle_32_2
,
622 lp_rast_triangle_32_3
,
623 lp_rast_triangle_32_4
,
624 lp_rast_triangle_32_5
,
625 lp_rast_triangle_32_6
,
626 lp_rast_triangle_32_7
,
627 lp_rast_triangle_32_8
,
628 lp_rast_triangle_32_3_4
,
629 lp_rast_triangle_32_3_16
,
630 lp_rast_triangle_32_4_16
,
631 lp_rast_triangle_ms_1
,
632 lp_rast_triangle_ms_2
,
633 lp_rast_triangle_ms_3
,
634 lp_rast_triangle_ms_4
,
635 lp_rast_triangle_ms_5
,
636 lp_rast_triangle_ms_6
,
637 lp_rast_triangle_ms_7
,
638 lp_rast_triangle_ms_8
,
639 lp_rast_triangle_ms_3_4
,
640 lp_rast_triangle_ms_3_16
,
641 lp_rast_triangle_ms_4_16
,
646 do_rasterize_bin(struct lp_rasterizer_task
*task
,
647 const struct cmd_bin
*bin
,
650 const struct cmd_block
*block
;
654 lp_debug_bin(bin
, x
, y
);
656 for (block
= bin
->head
; block
; block
= block
->next
) {
657 for (k
= 0; k
< block
->count
; k
++) {
658 dispatch
[block
->cmd
[k
]]( task
, block
->arg
[k
] );
666 * Rasterize commands for a single bin.
667 * \param x, y position of the bin's tile in the framebuffer
668 * Must be called between lp_rast_begin() and lp_rast_end().
672 rasterize_bin(struct lp_rasterizer_task
*task
,
673 const struct cmd_bin
*bin
, int x
, int y
)
675 lp_rast_tile_begin( task
, bin
, x
, y
);
677 do_rasterize_bin(task
, bin
, x
, y
);
679 lp_rast_tile_end(task
);
684 if (bin
->head
->count
== 1) {
685 if (bin
->head
->cmd
[0] == LP_RAST_OP_SHADE_TILE_OPAQUE
)
686 LP_COUNT(nr_pure_shade_opaque_64
);
687 else if (bin
->head
->cmd
[0] == LP_RAST_OP_SHADE_TILE
)
688 LP_COUNT(nr_pure_shade_64
);
694 /* An empty bin is one that just loads the contents of the tile and
695 * stores them again unchanged. This typically happens when bins have
696 * been flushed for some reason in the middle of a frame, or when
697 * incremental updates are being made to a render target.
699 * Try to avoid doing pointless work in this case.
702 is_empty_bin( const struct cmd_bin
*bin
)
704 return bin
->head
== NULL
;
709 * Rasterize/execute all bins within a scene.
713 rasterize_scene(struct lp_rasterizer_task
*task
,
714 struct lp_scene
*scene
)
718 /* Clear the cache tags. This should not always be necessary but
720 #if LP_USE_TEXTURE_CACHE
721 memset(task
->thread_data
.cache
->cache_tags
, 0,
722 sizeof(task
->thread_data
.cache
->cache_tags
));
723 #if LP_BUILD_FORMAT_CACHE_DEBUG
724 task
->thread_data
.cache
->cache_access_total
= 0;
725 task
->thread_data
.cache
->cache_access_miss
= 0;
729 if (!task
->rast
->no_rast
) {
730 /* loop over scene bins, rasterize each */
736 while ((bin
= lp_scene_bin_iter_next(scene
, &i
, &j
))) {
737 if (!is_empty_bin( bin
))
738 rasterize_bin(task
, bin
, i
, j
);
744 #if LP_BUILD_FORMAT_CACHE_DEBUG
746 uint64_t total
, miss
;
747 total
= task
->thread_data
.cache
->cache_access_total
;
748 miss
= task
->thread_data
.cache
->cache_access_miss
;
750 debug_printf("thread %d cache access %llu miss %llu hit rate %f\n",
751 task
->thread_index
, (long long unsigned)total
,
752 (long long unsigned)miss
,
753 (float)(total
- miss
)/(float)total
);
759 lp_fence_signal(scene
->fence
);
767 * Called by setup module when it has something for us to render.
770 lp_rast_queue_scene( struct lp_rasterizer
*rast
,
771 struct lp_scene
*scene
)
773 LP_DBG(DEBUG_SETUP
, "%s\n", __FUNCTION__
);
775 if (rast
->num_threads
== 0) {
777 unsigned fpstate
= util_fpstate_get();
779 /* Make sure that denorms are treated like zeros. This is
780 * the behavior required by D3D10. OpenGL doesn't care.
782 util_fpstate_set_denorms_to_zero(fpstate
);
784 lp_rast_begin( rast
, scene
);
786 rasterize_scene( &rast
->tasks
[0], scene
);
790 util_fpstate_set(fpstate
);
792 rast
->curr_scene
= NULL
;
795 /* threaded rendering! */
798 lp_scene_enqueue( rast
->full_scenes
, scene
);
800 /* signal the threads that there's work to do */
801 for (i
= 0; i
< rast
->num_threads
; i
++) {
802 pipe_semaphore_signal(&rast
->tasks
[i
].work_ready
);
806 LP_DBG(DEBUG_SETUP
, "%s done \n", __FUNCTION__
);
811 lp_rast_finish( struct lp_rasterizer
*rast
)
813 if (rast
->num_threads
== 0) {
819 /* wait for work to complete */
820 for (i
= 0; i
< rast
->num_threads
; i
++) {
821 pipe_semaphore_wait(&rast
->tasks
[i
].work_done
);
828 * This is the thread's main entrypoint.
829 * It's a simple loop:
832 * 3. signal that we're done
835 thread_function(void *init_data
)
837 struct lp_rasterizer_task
*task
= (struct lp_rasterizer_task
*) init_data
;
838 struct lp_rasterizer
*rast
= task
->rast
;
839 boolean debug
= false;
840 char thread_name
[16];
843 snprintf(thread_name
, sizeof thread_name
, "llvmpipe-%u", task
->thread_index
);
844 u_thread_setname(thread_name
);
846 /* Make sure that denorms are treated like zeros. This is
847 * the behavior required by D3D10. OpenGL doesn't care.
849 fpstate
= util_fpstate_get();
850 util_fpstate_set_denorms_to_zero(fpstate
);
855 debug_printf("thread %d waiting for work\n", task
->thread_index
);
856 pipe_semaphore_wait(&task
->work_ready
);
861 if (task
->thread_index
== 0) {
863 * - get next scene to rasterize
864 * - map the framebuffer surfaces
867 lp_scene_dequeue( rast
->full_scenes
, TRUE
) );
870 /* Wait for all threads to get here so that threads[1+] don't
871 * get a null rast->curr_scene pointer.
873 util_barrier_wait( &rast
->barrier
);
877 debug_printf("thread %d doing work\n", task
->thread_index
);
879 rasterize_scene(task
,
882 /* wait for all threads to finish with this scene */
883 util_barrier_wait( &rast
->barrier
);
885 /* XXX: shouldn't be necessary:
887 if (task
->thread_index
== 0) {
891 /* signal done with work */
893 debug_printf("thread %d done working\n", task
->thread_index
);
895 pipe_semaphore_signal(&task
->work_done
);
899 pipe_semaphore_signal(&task
->work_done
);
907 * Initialize semaphores and spawn the threads.
910 create_rast_threads(struct lp_rasterizer
*rast
)
914 /* NOTE: if num_threads is zero, we won't use any threads */
915 for (i
= 0; i
< rast
->num_threads
; i
++) {
916 pipe_semaphore_init(&rast
->tasks
[i
].work_ready
, 0);
917 pipe_semaphore_init(&rast
->tasks
[i
].work_done
, 0);
918 rast
->threads
[i
] = u_thread_create(thread_function
,
919 (void *) &rast
->tasks
[i
]);
920 if (!rast
->threads
[i
]) {
921 rast
->num_threads
= i
; /* previous thread is max */
930 * Create new lp_rasterizer. If num_threads is zero, don't create any
931 * new threads, do rendering synchronously.
932 * \param num_threads number of rasterizer threads to create
934 struct lp_rasterizer
*
935 lp_rast_create( unsigned num_threads
)
937 struct lp_rasterizer
*rast
;
940 rast
= CALLOC_STRUCT(lp_rasterizer
);
945 rast
->full_scenes
= lp_scene_queue_create();
946 if (!rast
->full_scenes
) {
950 for (i
= 0; i
< MAX2(1, num_threads
); i
++) {
951 struct lp_rasterizer_task
*task
= &rast
->tasks
[i
];
953 task
->thread_index
= i
;
954 task
->thread_data
.cache
= align_malloc(sizeof(struct lp_build_format_cache
),
956 if (!task
->thread_data
.cache
) {
957 goto no_thread_data_cache
;
961 rast
->num_threads
= num_threads
;
963 rast
->no_rast
= debug_get_bool_option("LP_NO_RAST", FALSE
);
965 create_rast_threads(rast
);
967 /* for synchronizing rasterization threads */
968 if (rast
->num_threads
> 0) {
969 util_barrier_init( &rast
->barrier
, rast
->num_threads
);
972 memset(lp_dummy_tile
, 0, sizeof lp_dummy_tile
);
976 no_thread_data_cache
:
977 for (i
= 0; i
< MAX2(1, rast
->num_threads
); i
++) {
978 if (rast
->tasks
[i
].thread_data
.cache
) {
979 align_free(rast
->tasks
[i
].thread_data
.cache
);
983 lp_scene_queue_destroy(rast
->full_scenes
);
993 void lp_rast_destroy( struct lp_rasterizer
*rast
)
997 /* Set exit_flag and signal each thread's work_ready semaphore.
998 * Each thread will be woken up, notice that the exit_flag is set and
999 * break out of its main loop. The thread will then exit.
1001 rast
->exit_flag
= TRUE
;
1002 for (i
= 0; i
< rast
->num_threads
; i
++) {
1003 pipe_semaphore_signal(&rast
->tasks
[i
].work_ready
);
1006 /* Wait for threads to terminate before cleaning up per-thread data.
1007 * We don't actually call pipe_thread_wait to avoid dead lock on Windows
1008 * per https://bugs.freedesktop.org/show_bug.cgi?id=76252 */
1009 for (i
= 0; i
< rast
->num_threads
; i
++) {
1011 pipe_semaphore_wait(&rast
->tasks
[i
].work_done
);
1013 thrd_join(rast
->threads
[i
], NULL
);
1017 /* Clean up per-thread data */
1018 for (i
= 0; i
< rast
->num_threads
; i
++) {
1019 pipe_semaphore_destroy(&rast
->tasks
[i
].work_ready
);
1020 pipe_semaphore_destroy(&rast
->tasks
[i
].work_done
);
1022 for (i
= 0; i
< MAX2(1, rast
->num_threads
); i
++) {
1023 align_free(rast
->tasks
[i
].thread_data
.cache
);
1026 /* for synchronizing rasterization threads */
1027 if (rast
->num_threads
> 0) {
1028 util_barrier_destroy( &rast
->barrier
);
1031 lp_scene_queue_destroy(rast
->full_scenes
);