79d4c588d12ba5e3aa48aac57a18cbf57714e491
[mesa.git] / src / gallium / drivers / llvmpipe / lp_rast.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 #include <limits.h>
29 #include "util/u_memory.h"
30 #include "util/u_math.h"
31 #include "util/u_rect.h"
32 #include "util/u_surface.h"
33 #include "util/u_pack_color.h"
34
35 #include "os/os_time.h"
36
37 #include "lp_scene_queue.h"
38 #include "lp_debug.h"
39 #include "lp_fence.h"
40 #include "lp_perf.h"
41 #include "lp_query.h"
42 #include "lp_rast.h"
43 #include "lp_rast_priv.h"
44 #include "gallivm/lp_bld_debug.h"
45 #include "lp_scene.h"
46 #include "lp_tex_sample.h"
47
48
49 #ifdef DEBUG
50 int jit_line = 0;
51 const struct lp_rast_state *jit_state = NULL;
52 const struct lp_rasterizer_task *jit_task = NULL;
53 #endif
54
55
56 /**
57 * Begin rasterizing a scene.
58 * Called once per scene by one thread.
59 */
60 static void
61 lp_rast_begin( struct lp_rasterizer *rast,
62 struct lp_scene *scene )
63 {
64
65 rast->curr_scene = scene;
66
67 LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__);
68
69 lp_scene_begin_rasterization( scene );
70 lp_scene_bin_iter_begin( scene );
71 }
72
73
74 static void
75 lp_rast_end( struct lp_rasterizer *rast )
76 {
77 lp_scene_end_rasterization( rast->curr_scene );
78
79 rast->curr_scene = NULL;
80 }
81
82
83 /**
84 * Begining rasterization of a tile.
85 * \param x window X position of the tile, in pixels
86 * \param y window Y position of the tile, in pixels
87 */
88 static void
89 lp_rast_tile_begin(struct lp_rasterizer_task *task,
90 const struct cmd_bin *bin,
91 int x, int y)
92 {
93 LP_DBG(DEBUG_RAST, "%s %d,%d\n", __FUNCTION__, x, y);
94
95 task->bin = bin;
96 task->x = x * TILE_SIZE;
97 task->y = y * TILE_SIZE;
98 task->width = TILE_SIZE + x * TILE_SIZE > task->scene->fb.width ?
99 task->scene->fb.width - x * TILE_SIZE : TILE_SIZE;
100 task->height = TILE_SIZE + y * TILE_SIZE > task->scene->fb.height ?
101 task->scene->fb.height - y * TILE_SIZE : TILE_SIZE;
102
103 /* reset pointers to color and depth tile(s) */
104 memset(task->color_tiles, 0, sizeof(task->color_tiles));
105 task->depth_tile = NULL;
106 }
107
108
109 /**
110 * Clear the rasterizer's current color tile.
111 * This is a bin command called during bin processing.
112 * Clear commands always clear all bound layers.
113 */
114 static void
115 lp_rast_clear_color(struct lp_rasterizer_task *task,
116 const union lp_rast_cmd_arg arg)
117 {
118 const struct lp_scene *scene = task->scene;
119
120 if (scene->fb.nr_cbufs) {
121 unsigned i;
122 union util_color uc;
123
124 if (util_format_is_pure_integer(scene->fb.cbufs[0]->format)) {
125 /*
126 * We expect int/uint clear values here, though some APIs
127 * might disagree (but in any case util_pack_color()
128 * couldn't handle it)...
129 */
130 LP_DBG(DEBUG_RAST, "%s pure int 0x%x,0x%x,0x%x,0x%x\n", __FUNCTION__,
131 arg.clear_color.ui[0],
132 arg.clear_color.ui[1],
133 arg.clear_color.ui[2],
134 arg.clear_color.ui[3]);
135
136 for (i = 0; i < scene->fb.nr_cbufs; i++) {
137 enum pipe_format format = scene->fb.cbufs[i]->format;
138
139 if (util_format_is_pure_sint(format)) {
140 util_format_write_4i(format, arg.clear_color.i, 0, &uc, 0, 0, 0, 1, 1);
141 }
142 else {
143 assert(util_format_is_pure_uint(format));
144 util_format_write_4ui(format, arg.clear_color.ui, 0, &uc, 0, 0, 0, 1, 1);
145 }
146
147 util_fill_box(scene->cbufs[i].map,
148 format,
149 scene->cbufs[i].stride,
150 scene->cbufs[i].layer_stride,
151 task->x,
152 task->y,
153 0,
154 task->width,
155 task->height,
156 scene->fb_max_layer + 1,
157 &uc);
158 }
159 }
160 else {
161 uint8_t clear_color[4];
162
163 for (i = 0; i < 4; ++i) {
164 clear_color[i] = float_to_ubyte(arg.clear_color.f[i]);
165 }
166
167 LP_DBG(DEBUG_RAST, "%s 0x%x,0x%x,0x%x,0x%x\n", __FUNCTION__,
168 clear_color[0],
169 clear_color[1],
170 clear_color[2],
171 clear_color[3]);
172
173 for (i = 0; i < scene->fb.nr_cbufs; i++) {
174 util_pack_color(arg.clear_color.f,
175 scene->fb.cbufs[i]->format, &uc);
176
177 util_fill_box(scene->cbufs[i].map,
178 scene->fb.cbufs[i]->format,
179 scene->cbufs[i].stride,
180 scene->cbufs[i].layer_stride,
181 task->x,
182 task->y,
183 0,
184 task->width,
185 task->height,
186 scene->fb_max_layer + 1,
187 &uc);
188 }
189 }
190 }
191
192 LP_COUNT(nr_color_tile_clear);
193 }
194
195
196
197
198 /**
199 * Clear the rasterizer's current z/stencil tile.
200 * This is a bin command called during bin processing.
201 * Clear commands always clear all bound layers.
202 */
203 static void
204 lp_rast_clear_zstencil(struct lp_rasterizer_task *task,
205 const union lp_rast_cmd_arg arg)
206 {
207 const struct lp_scene *scene = task->scene;
208 uint64_t clear_value64 = arg.clear_zstencil.value;
209 uint64_t clear_mask64 = arg.clear_zstencil.mask;
210 uint32_t clear_value = (uint32_t) clear_value64;
211 uint32_t clear_mask = (uint32_t) clear_mask64;
212 const unsigned height = task->height;
213 const unsigned width = task->width;
214 const unsigned dst_stride = scene->zsbuf.stride;
215 uint8_t *dst;
216 unsigned i, j;
217 unsigned block_size;
218
219 LP_DBG(DEBUG_RAST, "%s: value=0x%08x, mask=0x%08x\n",
220 __FUNCTION__, clear_value, clear_mask);
221
222 /*
223 * Clear the area of the depth/depth buffer matching this tile.
224 */
225
226 if (scene->fb.zsbuf) {
227 unsigned layer;
228 uint8_t *dst_layer = lp_rast_get_unswizzled_depth_tile_pointer(task, LP_TEX_USAGE_READ_WRITE);
229 block_size = util_format_get_blocksize(scene->fb.zsbuf->format);
230
231 clear_value &= clear_mask;
232
233 for (layer = 0; layer <= scene->fb_max_layer; layer++) {
234 dst = dst_layer;
235
236 switch (block_size) {
237 case 1:
238 assert(clear_mask == 0xff);
239 memset(dst, (uint8_t) clear_value, height * width);
240 break;
241 case 2:
242 if (clear_mask == 0xffff) {
243 for (i = 0; i < height; i++) {
244 uint16_t *row = (uint16_t *)dst;
245 for (j = 0; j < width; j++)
246 *row++ = (uint16_t) clear_value;
247 dst += dst_stride;
248 }
249 }
250 else {
251 for (i = 0; i < height; i++) {
252 uint16_t *row = (uint16_t *)dst;
253 for (j = 0; j < width; j++) {
254 uint16_t tmp = ~clear_mask & *row;
255 *row++ = clear_value | tmp;
256 }
257 dst += dst_stride;
258 }
259 }
260 break;
261 case 4:
262 if (clear_mask == 0xffffffff) {
263 for (i = 0; i < height; i++) {
264 uint32_t *row = (uint32_t *)dst;
265 for (j = 0; j < width; j++)
266 *row++ = clear_value;
267 dst += dst_stride;
268 }
269 }
270 else {
271 for (i = 0; i < height; i++) {
272 uint32_t *row = (uint32_t *)dst;
273 for (j = 0; j < width; j++) {
274 uint32_t tmp = ~clear_mask & *row;
275 *row++ = clear_value | tmp;
276 }
277 dst += dst_stride;
278 }
279 }
280 break;
281 case 8:
282 clear_value64 &= clear_mask64;
283 if (clear_mask64 == 0xffffffffffULL) {
284 for (i = 0; i < height; i++) {
285 uint64_t *row = (uint64_t *)dst;
286 for (j = 0; j < width; j++)
287 *row++ = clear_value64;
288 dst += dst_stride;
289 }
290 }
291 else {
292 for (i = 0; i < height; i++) {
293 uint64_t *row = (uint64_t *)dst;
294 for (j = 0; j < width; j++) {
295 uint64_t tmp = ~clear_mask64 & *row;
296 *row++ = clear_value64 | tmp;
297 }
298 dst += dst_stride;
299 }
300 }
301 break;
302
303 default:
304 assert(0);
305 break;
306 }
307 dst_layer += scene->zsbuf.layer_stride;
308 }
309 }
310 }
311
312
313
314 /**
315 * Run the shader on all blocks in a tile. This is used when a tile is
316 * completely contained inside a triangle.
317 * This is a bin command called during bin processing.
318 */
319 static void
320 lp_rast_shade_tile(struct lp_rasterizer_task *task,
321 const union lp_rast_cmd_arg arg)
322 {
323 const struct lp_scene *scene = task->scene;
324 const struct lp_rast_shader_inputs *inputs = arg.shade_tile;
325 const struct lp_rast_state *state;
326 struct lp_fragment_shader_variant *variant;
327 const unsigned tile_x = task->x, tile_y = task->y;
328 unsigned x, y;
329
330 if (inputs->disable) {
331 /* This command was partially binned and has been disabled */
332 return;
333 }
334
335 LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__);
336
337 state = task->state;
338 assert(state);
339 if (!state) {
340 return;
341 }
342 variant = state->variant;
343
344 /* render the whole 64x64 tile in 4x4 chunks */
345 for (y = 0; y < task->height; y += 4){
346 for (x = 0; x < task->width; x += 4) {
347 uint8_t *color[PIPE_MAX_COLOR_BUFS];
348 unsigned stride[PIPE_MAX_COLOR_BUFS];
349 uint8_t *depth = NULL;
350 unsigned depth_stride = 0;
351 unsigned i;
352
353 /* color buffer */
354 for (i = 0; i < scene->fb.nr_cbufs; i++){
355 stride[i] = scene->cbufs[i].stride;
356 color[i] = lp_rast_get_unswizzled_color_block_pointer(task, i, tile_x + x,
357 tile_y + y, inputs->layer);
358 }
359
360 /* depth buffer */
361 if (scene->zsbuf.map) {
362 depth = lp_rast_get_unswizzled_depth_block_pointer(task, tile_x + x,
363 tile_y + y, inputs->layer);
364 depth_stride = scene->zsbuf.stride;
365 }
366
367 /* run shader on 4x4 block */
368 BEGIN_JIT_CALL(state, task);
369 variant->jit_function[RAST_WHOLE]( &state->jit_context,
370 tile_x + x, tile_y + y,
371 inputs->frontfacing,
372 GET_A0(inputs),
373 GET_DADX(inputs),
374 GET_DADY(inputs),
375 color,
376 depth,
377 0xffff,
378 &task->thread_data,
379 stride,
380 depth_stride);
381 END_JIT_CALL();
382 }
383 }
384 }
385
386
387 /**
388 * Run the shader on all blocks in a tile. This is used when a tile is
389 * completely contained inside a triangle, and the shader is opaque.
390 * This is a bin command called during bin processing.
391 */
392 static void
393 lp_rast_shade_tile_opaque(struct lp_rasterizer_task *task,
394 const union lp_rast_cmd_arg arg)
395 {
396 LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__);
397
398 assert(task->state);
399 if (!task->state) {
400 return;
401 }
402
403 lp_rast_shade_tile(task, arg);
404 }
405
406
407 /**
408 * Compute shading for a 4x4 block of pixels inside a triangle.
409 * This is a bin command called during bin processing.
410 * \param x X position of quad in window coords
411 * \param y Y position of quad in window coords
412 */
413 void
414 lp_rast_shade_quads_mask(struct lp_rasterizer_task *task,
415 const struct lp_rast_shader_inputs *inputs,
416 unsigned x, unsigned y,
417 unsigned mask)
418 {
419 const struct lp_rast_state *state = task->state;
420 struct lp_fragment_shader_variant *variant = state->variant;
421 const struct lp_scene *scene = task->scene;
422 uint8_t *color[PIPE_MAX_COLOR_BUFS];
423 unsigned stride[PIPE_MAX_COLOR_BUFS];
424 uint8_t *depth = NULL;
425 unsigned depth_stride = 0;
426 unsigned i;
427
428 assert(state);
429
430 /* Sanity checks */
431 assert(x < scene->tiles_x * TILE_SIZE);
432 assert(y < scene->tiles_y * TILE_SIZE);
433 assert(x % TILE_VECTOR_WIDTH == 0);
434 assert(y % TILE_VECTOR_HEIGHT == 0);
435
436 assert((x % 4) == 0);
437 assert((y % 4) == 0);
438
439 /* color buffer */
440 for (i = 0; i < scene->fb.nr_cbufs; i++) {
441 stride[i] = scene->cbufs[i].stride;
442 color[i] = lp_rast_get_unswizzled_color_block_pointer(task, i, x, y, inputs->layer);
443 }
444
445 /* depth buffer */
446 if (scene->zsbuf.map) {
447 depth_stride = scene->zsbuf.stride;
448 depth = lp_rast_get_unswizzled_depth_block_pointer(task, x, y, inputs->layer);
449 }
450
451 assert(lp_check_alignment(state->jit_context.u8_blend_color, 16));
452
453 /*
454 * The rasterizer may produce fragments outside our
455 * allocated 4x4 blocks hence need to filter them out here.
456 */
457 if ((x % TILE_SIZE) < task->width && (y % TILE_SIZE) < task->height) {
458 /* run shader on 4x4 block */
459 BEGIN_JIT_CALL(state, task);
460 variant->jit_function[RAST_EDGE_TEST](&state->jit_context,
461 x, y,
462 inputs->frontfacing,
463 GET_A0(inputs),
464 GET_DADX(inputs),
465 GET_DADY(inputs),
466 color,
467 depth,
468 mask,
469 &task->thread_data,
470 stride,
471 depth_stride);
472 END_JIT_CALL();
473 }
474 }
475
476
477
478 /**
479 * Begin a new occlusion query.
480 * This is a bin command put in all bins.
481 * Called per thread.
482 */
483 static void
484 lp_rast_begin_query(struct lp_rasterizer_task *task,
485 const union lp_rast_cmd_arg arg)
486 {
487 struct llvmpipe_query *pq = arg.query_obj;
488
489 assert(task->query[pq->type] == NULL);
490
491 switch (pq->type) {
492 case PIPE_QUERY_OCCLUSION_COUNTER:
493 task->thread_data.vis_counter = 0;
494 break;
495 case PIPE_QUERY_PRIMITIVES_GENERATED:
496 case PIPE_QUERY_PRIMITIVES_EMITTED:
497 case PIPE_QUERY_SO_STATISTICS:
498 case PIPE_QUERY_PIPELINE_STATISTICS:
499 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
500 break;
501 default:
502 assert(0);
503 break;
504 }
505
506 task->query[pq->type] = pq;
507 }
508
509
510 /**
511 * End the current occlusion query.
512 * This is a bin command put in all bins.
513 * Called per thread.
514 */
515 static void
516 lp_rast_end_query(struct lp_rasterizer_task *task,
517 const union lp_rast_cmd_arg arg)
518 {
519 struct llvmpipe_query *pq = arg.query_obj;
520 assert(task->query[pq->type] == pq || pq->type == PIPE_QUERY_TIMESTAMP);
521
522 switch (pq->type) {
523 case PIPE_QUERY_OCCLUSION_COUNTER:
524 pq->count[task->thread_index] += task->thread_data.vis_counter;
525 break;
526 case PIPE_QUERY_TIMESTAMP:
527 pq->count[task->thread_index] = os_time_get_nano();
528 break;
529 case PIPE_QUERY_PRIMITIVES_GENERATED:
530 case PIPE_QUERY_PRIMITIVES_EMITTED:
531 case PIPE_QUERY_SO_STATISTICS:
532 case PIPE_QUERY_PIPELINE_STATISTICS:
533 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
534 break;
535 default:
536 assert(0);
537 break;
538 }
539
540 if (task->query[pq->type] == pq) {
541 task->query[pq->type] = NULL;
542 }
543 }
544
545
546 void
547 lp_rast_set_state(struct lp_rasterizer_task *task,
548 const union lp_rast_cmd_arg arg)
549 {
550 task->state = arg.state;
551 }
552
553
554
555 /**
556 * Called when we're done writing to a color tile.
557 */
558 static void
559 lp_rast_tile_end(struct lp_rasterizer_task *task)
560 {
561 unsigned i;
562
563 for (i = 0; i < PIPE_QUERY_TYPES; ++i) {
564 if (task->query[i]) {
565 lp_rast_end_query(task, lp_rast_arg_query(task->query[i]));
566 }
567 }
568
569 /* debug */
570 memset(task->color_tiles, 0, sizeof(task->color_tiles));
571 task->depth_tile = NULL;
572
573 task->bin = NULL;
574 }
575
576 static lp_rast_cmd_func dispatch[LP_RAST_OP_MAX] =
577 {
578 lp_rast_clear_color,
579 lp_rast_clear_zstencil,
580 lp_rast_triangle_1,
581 lp_rast_triangle_2,
582 lp_rast_triangle_3,
583 lp_rast_triangle_4,
584 lp_rast_triangle_5,
585 lp_rast_triangle_6,
586 lp_rast_triangle_7,
587 lp_rast_triangle_8,
588 lp_rast_triangle_3_4,
589 lp_rast_triangle_3_16,
590 lp_rast_triangle_4_16,
591 lp_rast_shade_tile,
592 lp_rast_shade_tile_opaque,
593 lp_rast_begin_query,
594 lp_rast_end_query,
595 lp_rast_set_state,
596 };
597
598
599 static void
600 do_rasterize_bin(struct lp_rasterizer_task *task,
601 const struct cmd_bin *bin,
602 int x, int y)
603 {
604 const struct cmd_block *block;
605 unsigned k;
606
607 if (0)
608 lp_debug_bin(bin, x, y);
609
610 for (block = bin->head; block; block = block->next) {
611 for (k = 0; k < block->count; k++) {
612 dispatch[block->cmd[k]]( task, block->arg[k] );
613 }
614 }
615 }
616
617
618
619 /**
620 * Rasterize commands for a single bin.
621 * \param x, y position of the bin's tile in the framebuffer
622 * Must be called between lp_rast_begin() and lp_rast_end().
623 * Called per thread.
624 */
625 static void
626 rasterize_bin(struct lp_rasterizer_task *task,
627 const struct cmd_bin *bin, int x, int y )
628 {
629 lp_rast_tile_begin( task, bin, x, y );
630
631 do_rasterize_bin(task, bin, x, y);
632
633 lp_rast_tile_end(task);
634
635
636 /* Debug/Perf flags:
637 */
638 if (bin->head->count == 1) {
639 if (bin->head->cmd[0] == LP_RAST_OP_SHADE_TILE_OPAQUE)
640 LP_COUNT(nr_pure_shade_opaque_64);
641 else if (bin->head->cmd[0] == LP_RAST_OP_SHADE_TILE)
642 LP_COUNT(nr_pure_shade_64);
643 }
644 }
645
646
647 /* An empty bin is one that just loads the contents of the tile and
648 * stores them again unchanged. This typically happens when bins have
649 * been flushed for some reason in the middle of a frame, or when
650 * incremental updates are being made to a render target.
651 *
652 * Try to avoid doing pointless work in this case.
653 */
654 static boolean
655 is_empty_bin( const struct cmd_bin *bin )
656 {
657 return bin->head == NULL;
658 }
659
660
661 /**
662 * Rasterize/execute all bins within a scene.
663 * Called per thread.
664 */
665 static void
666 rasterize_scene(struct lp_rasterizer_task *task,
667 struct lp_scene *scene)
668 {
669 task->scene = scene;
670
671 if (!task->rast->no_rast && !scene->discard) {
672 /* loop over scene bins, rasterize each */
673 {
674 struct cmd_bin *bin;
675 int i, j;
676
677 assert(scene);
678 while ((bin = lp_scene_bin_iter_next(scene, &i, &j))) {
679 if (!is_empty_bin( bin ))
680 rasterize_bin(task, bin, i, j);
681 }
682 }
683 }
684
685
686 if (scene->fence) {
687 lp_fence_signal(scene->fence);
688 }
689
690 task->scene = NULL;
691 }
692
693
694 /**
695 * Called by setup module when it has something for us to render.
696 */
697 void
698 lp_rast_queue_scene( struct lp_rasterizer *rast,
699 struct lp_scene *scene)
700 {
701 LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__);
702
703 if (rast->num_threads == 0) {
704 /* no threading */
705
706 lp_rast_begin( rast, scene );
707
708 rasterize_scene( &rast->tasks[0], scene );
709
710 lp_rast_end( rast );
711
712 rast->curr_scene = NULL;
713 }
714 else {
715 /* threaded rendering! */
716 unsigned i;
717
718 lp_scene_enqueue( rast->full_scenes, scene );
719
720 /* signal the threads that there's work to do */
721 for (i = 0; i < rast->num_threads; i++) {
722 pipe_semaphore_signal(&rast->tasks[i].work_ready);
723 }
724 }
725
726 LP_DBG(DEBUG_SETUP, "%s done \n", __FUNCTION__);
727 }
728
729
730 void
731 lp_rast_finish( struct lp_rasterizer *rast )
732 {
733 if (rast->num_threads == 0) {
734 /* nothing to do */
735 }
736 else {
737 int i;
738
739 /* wait for work to complete */
740 for (i = 0; i < rast->num_threads; i++) {
741 pipe_semaphore_wait(&rast->tasks[i].work_done);
742 }
743 }
744 }
745
746
747 /**
748 * This is the thread's main entrypoint.
749 * It's a simple loop:
750 * 1. wait for work
751 * 2. do work
752 * 3. signal that we're done
753 */
754 static PIPE_THREAD_ROUTINE( thread_function, init_data )
755 {
756 struct lp_rasterizer_task *task = (struct lp_rasterizer_task *) init_data;
757 struct lp_rasterizer *rast = task->rast;
758 boolean debug = false;
759
760 while (1) {
761 /* wait for work */
762 if (debug)
763 debug_printf("thread %d waiting for work\n", task->thread_index);
764 pipe_semaphore_wait(&task->work_ready);
765
766 if (rast->exit_flag)
767 break;
768
769 if (task->thread_index == 0) {
770 /* thread[0]:
771 * - get next scene to rasterize
772 * - map the framebuffer surfaces
773 */
774 lp_rast_begin( rast,
775 lp_scene_dequeue( rast->full_scenes, TRUE ) );
776 }
777
778 /* Wait for all threads to get here so that threads[1+] don't
779 * get a null rast->curr_scene pointer.
780 */
781 pipe_barrier_wait( &rast->barrier );
782
783 /* do work */
784 if (debug)
785 debug_printf("thread %d doing work\n", task->thread_index);
786
787 rasterize_scene(task,
788 rast->curr_scene);
789
790 /* wait for all threads to finish with this scene */
791 pipe_barrier_wait( &rast->barrier );
792
793 /* XXX: shouldn't be necessary:
794 */
795 if (task->thread_index == 0) {
796 lp_rast_end( rast );
797 }
798
799 /* signal done with work */
800 if (debug)
801 debug_printf("thread %d done working\n", task->thread_index);
802
803 pipe_semaphore_signal(&task->work_done);
804 }
805
806 return NULL;
807 }
808
809
810 /**
811 * Initialize semaphores and spawn the threads.
812 */
813 static void
814 create_rast_threads(struct lp_rasterizer *rast)
815 {
816 unsigned i;
817
818 /* NOTE: if num_threads is zero, we won't use any threads */
819 for (i = 0; i < rast->num_threads; i++) {
820 pipe_semaphore_init(&rast->tasks[i].work_ready, 0);
821 pipe_semaphore_init(&rast->tasks[i].work_done, 0);
822 rast->threads[i] = pipe_thread_create(thread_function,
823 (void *) &rast->tasks[i]);
824 }
825 }
826
827
828
829 /**
830 * Create new lp_rasterizer. If num_threads is zero, don't create any
831 * new threads, do rendering synchronously.
832 * \param num_threads number of rasterizer threads to create
833 */
834 struct lp_rasterizer *
835 lp_rast_create( unsigned num_threads )
836 {
837 struct lp_rasterizer *rast;
838 unsigned i;
839
840 rast = CALLOC_STRUCT(lp_rasterizer);
841 if (!rast) {
842 goto no_rast;
843 }
844
845 rast->full_scenes = lp_scene_queue_create();
846 if (!rast->full_scenes) {
847 goto no_full_scenes;
848 }
849
850 for (i = 0; i < Elements(rast->tasks); i++) {
851 struct lp_rasterizer_task *task = &rast->tasks[i];
852 task->rast = rast;
853 task->thread_index = i;
854 }
855
856 rast->num_threads = num_threads;
857
858 rast->no_rast = debug_get_bool_option("LP_NO_RAST", FALSE);
859
860 create_rast_threads(rast);
861
862 /* for synchronizing rasterization threads */
863 pipe_barrier_init( &rast->barrier, rast->num_threads );
864
865 memset(lp_dummy_tile, 0, sizeof lp_dummy_tile);
866
867 return rast;
868
869 no_full_scenes:
870 FREE(rast);
871 no_rast:
872 return NULL;
873 }
874
875
876 /* Shutdown:
877 */
878 void lp_rast_destroy( struct lp_rasterizer *rast )
879 {
880 unsigned i;
881
882 /* Set exit_flag and signal each thread's work_ready semaphore.
883 * Each thread will be woken up, notice that the exit_flag is set and
884 * break out of its main loop. The thread will then exit.
885 */
886 rast->exit_flag = TRUE;
887 for (i = 0; i < rast->num_threads; i++) {
888 pipe_semaphore_signal(&rast->tasks[i].work_ready);
889 }
890
891 /* Wait for threads to terminate before cleaning up per-thread data */
892 for (i = 0; i < rast->num_threads; i++) {
893 pipe_thread_wait(rast->threads[i]);
894 }
895
896 /* Clean up per-thread data */
897 for (i = 0; i < rast->num_threads; i++) {
898 pipe_semaphore_destroy(&rast->tasks[i].work_ready);
899 pipe_semaphore_destroy(&rast->tasks[i].work_done);
900 }
901
902 /* for synchronizing rasterization threads */
903 pipe_barrier_destroy( &rast->barrier );
904
905 lp_scene_queue_destroy(rast->full_scenes);
906
907 FREE(rast);
908 }
909
910
911 /** Return number of rasterization threads */
912 unsigned
913 lp_rast_get_num_threads( struct lp_rasterizer *rast )
914 {
915 return rast->num_threads;
916 }
917
918