772c9d7c681cc06684898fa2ae5751c97fc0961e
[mesa.git] / src / gallium / drivers / llvmpipe / lp_rast.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 #include <limits.h>
29 #include "util/u_memory.h"
30 #include "util/u_math.h"
31 #include "util/u_rect.h"
32 #include "util/u_surface.h"
33 #include "util/u_pack_color.h"
34
35 #include "os/os_time.h"
36
37 #include "lp_scene_queue.h"
38 #include "lp_debug.h"
39 #include "lp_fence.h"
40 #include "lp_perf.h"
41 #include "lp_query.h"
42 #include "lp_rast.h"
43 #include "lp_rast_priv.h"
44 #include "gallivm/lp_bld_debug.h"
45 #include "lp_scene.h"
46 #include "lp_tex_sample.h"
47
48
49 #ifdef DEBUG
50 int jit_line = 0;
51 const struct lp_rast_state *jit_state = NULL;
52 const struct lp_rasterizer_task *jit_task = NULL;
53 #endif
54
55
56 /**
57 * Begin rasterizing a scene.
58 * Called once per scene by one thread.
59 */
60 static void
61 lp_rast_begin( struct lp_rasterizer *rast,
62 struct lp_scene *scene )
63 {
64 rast->curr_scene = scene;
65
66 LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__);
67
68 lp_scene_begin_rasterization( scene );
69 lp_scene_bin_iter_begin( scene );
70 }
71
72
73 static void
74 lp_rast_end( struct lp_rasterizer *rast )
75 {
76 lp_scene_end_rasterization( rast->curr_scene );
77
78 rast->curr_scene = NULL;
79 }
80
81
82 /**
83 * Begining rasterization of a tile.
84 * \param x window X position of the tile, in pixels
85 * \param y window Y position of the tile, in pixels
86 */
87 static void
88 lp_rast_tile_begin(struct lp_rasterizer_task *task,
89 const struct cmd_bin *bin,
90 int x, int y)
91 {
92 LP_DBG(DEBUG_RAST, "%s %d,%d\n", __FUNCTION__, x, y);
93
94 task->bin = bin;
95 task->x = x * TILE_SIZE;
96 task->y = y * TILE_SIZE;
97 task->width = TILE_SIZE + x * TILE_SIZE > task->scene->fb.width ?
98 task->scene->fb.width - x * TILE_SIZE : TILE_SIZE;
99 task->height = TILE_SIZE + y * TILE_SIZE > task->scene->fb.height ?
100 task->scene->fb.height - y * TILE_SIZE : TILE_SIZE;
101
102 task->thread_data.vis_counter = 0;
103 task->ps_invocations = 0;
104
105 /* reset pointers to color and depth tile(s) */
106 memset(task->color_tiles, 0, sizeof(task->color_tiles));
107 task->depth_tile = NULL;
108 }
109
110
111 /**
112 * Clear the rasterizer's current color tile.
113 * This is a bin command called during bin processing.
114 * Clear commands always clear all bound layers.
115 */
116 static void
117 lp_rast_clear_color(struct lp_rasterizer_task *task,
118 const union lp_rast_cmd_arg arg)
119 {
120 const struct lp_scene *scene = task->scene;
121
122 if (scene->fb.nr_cbufs) {
123 unsigned i;
124 union util_color uc;
125
126 if (util_format_is_pure_integer(scene->fb.cbufs[0]->format)) {
127 /*
128 * We expect int/uint clear values here, though some APIs
129 * might disagree (but in any case util_pack_color()
130 * couldn't handle it)...
131 */
132 LP_DBG(DEBUG_RAST, "%s pure int 0x%x,0x%x,0x%x,0x%x\n", __FUNCTION__,
133 arg.clear_color.ui[0],
134 arg.clear_color.ui[1],
135 arg.clear_color.ui[2],
136 arg.clear_color.ui[3]);
137
138 for (i = 0; i < scene->fb.nr_cbufs; i++) {
139 enum pipe_format format = scene->fb.cbufs[i]->format;
140
141 if (util_format_is_pure_sint(format)) {
142 util_format_write_4i(format, arg.clear_color.i, 0, &uc, 0, 0, 0, 1, 1);
143 }
144 else {
145 assert(util_format_is_pure_uint(format));
146 util_format_write_4ui(format, arg.clear_color.ui, 0, &uc, 0, 0, 0, 1, 1);
147 }
148
149 util_fill_box(scene->cbufs[i].map,
150 format,
151 scene->cbufs[i].stride,
152 scene->cbufs[i].layer_stride,
153 task->x,
154 task->y,
155 0,
156 task->width,
157 task->height,
158 scene->fb_max_layer + 1,
159 &uc);
160 }
161 }
162 else {
163 uint8_t clear_color[4];
164
165 for (i = 0; i < 4; ++i) {
166 clear_color[i] = float_to_ubyte(arg.clear_color.f[i]);
167 }
168
169 LP_DBG(DEBUG_RAST, "%s 0x%x,0x%x,0x%x,0x%x\n", __FUNCTION__,
170 clear_color[0],
171 clear_color[1],
172 clear_color[2],
173 clear_color[3]);
174
175 for (i = 0; i < scene->fb.nr_cbufs; i++) {
176 util_pack_color(arg.clear_color.f,
177 scene->fb.cbufs[i]->format, &uc);
178
179 util_fill_box(scene->cbufs[i].map,
180 scene->fb.cbufs[i]->format,
181 scene->cbufs[i].stride,
182 scene->cbufs[i].layer_stride,
183 task->x,
184 task->y,
185 0,
186 task->width,
187 task->height,
188 scene->fb_max_layer + 1,
189 &uc);
190 }
191 }
192 }
193
194 LP_COUNT(nr_color_tile_clear);
195 }
196
197
198
199
200 /**
201 * Clear the rasterizer's current z/stencil tile.
202 * This is a bin command called during bin processing.
203 * Clear commands always clear all bound layers.
204 */
205 static void
206 lp_rast_clear_zstencil(struct lp_rasterizer_task *task,
207 const union lp_rast_cmd_arg arg)
208 {
209 const struct lp_scene *scene = task->scene;
210 uint64_t clear_value64 = arg.clear_zstencil.value;
211 uint64_t clear_mask64 = arg.clear_zstencil.mask;
212 uint32_t clear_value = (uint32_t) clear_value64;
213 uint32_t clear_mask = (uint32_t) clear_mask64;
214 const unsigned height = task->height;
215 const unsigned width = task->width;
216 const unsigned dst_stride = scene->zsbuf.stride;
217 uint8_t *dst;
218 unsigned i, j;
219 unsigned block_size;
220
221 LP_DBG(DEBUG_RAST, "%s: value=0x%08x, mask=0x%08x\n",
222 __FUNCTION__, clear_value, clear_mask);
223
224 /*
225 * Clear the area of the depth/depth buffer matching this tile.
226 */
227
228 if (scene->fb.zsbuf) {
229 unsigned layer;
230 uint8_t *dst_layer = lp_rast_get_unswizzled_depth_tile_pointer(task, LP_TEX_USAGE_READ_WRITE);
231 block_size = util_format_get_blocksize(scene->fb.zsbuf->format);
232
233 clear_value &= clear_mask;
234
235 for (layer = 0; layer <= scene->fb_max_layer; layer++) {
236 dst = dst_layer;
237
238 switch (block_size) {
239 case 1:
240 assert(clear_mask == 0xff);
241 memset(dst, (uint8_t) clear_value, height * width);
242 break;
243 case 2:
244 if (clear_mask == 0xffff) {
245 for (i = 0; i < height; i++) {
246 uint16_t *row = (uint16_t *)dst;
247 for (j = 0; j < width; j++)
248 *row++ = (uint16_t) clear_value;
249 dst += dst_stride;
250 }
251 }
252 else {
253 for (i = 0; i < height; i++) {
254 uint16_t *row = (uint16_t *)dst;
255 for (j = 0; j < width; j++) {
256 uint16_t tmp = ~clear_mask & *row;
257 *row++ = clear_value | tmp;
258 }
259 dst += dst_stride;
260 }
261 }
262 break;
263 case 4:
264 if (clear_mask == 0xffffffff) {
265 for (i = 0; i < height; i++) {
266 uint32_t *row = (uint32_t *)dst;
267 for (j = 0; j < width; j++)
268 *row++ = clear_value;
269 dst += dst_stride;
270 }
271 }
272 else {
273 for (i = 0; i < height; i++) {
274 uint32_t *row = (uint32_t *)dst;
275 for (j = 0; j < width; j++) {
276 uint32_t tmp = ~clear_mask & *row;
277 *row++ = clear_value | tmp;
278 }
279 dst += dst_stride;
280 }
281 }
282 break;
283 case 8:
284 clear_value64 &= clear_mask64;
285 if (clear_mask64 == 0xffffffffffULL) {
286 for (i = 0; i < height; i++) {
287 uint64_t *row = (uint64_t *)dst;
288 for (j = 0; j < width; j++)
289 *row++ = clear_value64;
290 dst += dst_stride;
291 }
292 }
293 else {
294 for (i = 0; i < height; i++) {
295 uint64_t *row = (uint64_t *)dst;
296 for (j = 0; j < width; j++) {
297 uint64_t tmp = ~clear_mask64 & *row;
298 *row++ = clear_value64 | tmp;
299 }
300 dst += dst_stride;
301 }
302 }
303 break;
304
305 default:
306 assert(0);
307 break;
308 }
309 dst_layer += scene->zsbuf.layer_stride;
310 }
311 }
312 }
313
314
315
316 /**
317 * Run the shader on all blocks in a tile. This is used when a tile is
318 * completely contained inside a triangle.
319 * This is a bin command called during bin processing.
320 */
321 static void
322 lp_rast_shade_tile(struct lp_rasterizer_task *task,
323 const union lp_rast_cmd_arg arg)
324 {
325 const struct lp_scene *scene = task->scene;
326 const struct lp_rast_shader_inputs *inputs = arg.shade_tile;
327 const struct lp_rast_state *state;
328 struct lp_fragment_shader_variant *variant;
329 const unsigned tile_x = task->x, tile_y = task->y;
330 unsigned x, y;
331
332 if (inputs->disable) {
333 /* This command was partially binned and has been disabled */
334 return;
335 }
336
337 LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__);
338
339 state = task->state;
340 assert(state);
341 if (!state) {
342 return;
343 }
344 variant = state->variant;
345
346 /* render the whole 64x64 tile in 4x4 chunks */
347 for (y = 0; y < task->height; y += 4){
348 for (x = 0; x < task->width; x += 4) {
349 uint8_t *color[PIPE_MAX_COLOR_BUFS];
350 unsigned stride[PIPE_MAX_COLOR_BUFS];
351 uint8_t *depth = NULL;
352 unsigned depth_stride = 0;
353 unsigned i;
354
355 /* color buffer */
356 for (i = 0; i < scene->fb.nr_cbufs; i++){
357 stride[i] = scene->cbufs[i].stride;
358 color[i] = lp_rast_get_unswizzled_color_block_pointer(task, i, tile_x + x,
359 tile_y + y, inputs->layer);
360 }
361
362 /* depth buffer */
363 if (scene->zsbuf.map) {
364 depth = lp_rast_get_unswizzled_depth_block_pointer(task, tile_x + x,
365 tile_y + y, inputs->layer);
366 depth_stride = scene->zsbuf.stride;
367 }
368
369 /* run shader on 4x4 block */
370 BEGIN_JIT_CALL(state, task);
371 variant->jit_function[RAST_WHOLE]( &state->jit_context,
372 tile_x + x, tile_y + y,
373 inputs->frontfacing,
374 GET_A0(inputs),
375 GET_DADX(inputs),
376 GET_DADY(inputs),
377 color,
378 depth,
379 0xffff,
380 &task->thread_data,
381 stride,
382 depth_stride);
383 END_JIT_CALL();
384 }
385 }
386 }
387
388
389 /**
390 * Run the shader on all blocks in a tile. This is used when a tile is
391 * completely contained inside a triangle, and the shader is opaque.
392 * This is a bin command called during bin processing.
393 */
394 static void
395 lp_rast_shade_tile_opaque(struct lp_rasterizer_task *task,
396 const union lp_rast_cmd_arg arg)
397 {
398 LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__);
399
400 assert(task->state);
401 if (!task->state) {
402 return;
403 }
404
405 lp_rast_shade_tile(task, arg);
406 }
407
408
409 /**
410 * Compute shading for a 4x4 block of pixels inside a triangle.
411 * This is a bin command called during bin processing.
412 * \param x X position of quad in window coords
413 * \param y Y position of quad in window coords
414 */
415 void
416 lp_rast_shade_quads_mask(struct lp_rasterizer_task *task,
417 const struct lp_rast_shader_inputs *inputs,
418 unsigned x, unsigned y,
419 unsigned mask)
420 {
421 const struct lp_rast_state *state = task->state;
422 struct lp_fragment_shader_variant *variant = state->variant;
423 const struct lp_scene *scene = task->scene;
424 uint8_t *color[PIPE_MAX_COLOR_BUFS];
425 unsigned stride[PIPE_MAX_COLOR_BUFS];
426 uint8_t *depth = NULL;
427 unsigned depth_stride = 0;
428 unsigned i;
429
430 assert(state);
431
432 /* Sanity checks */
433 assert(x < scene->tiles_x * TILE_SIZE);
434 assert(y < scene->tiles_y * TILE_SIZE);
435 assert(x % TILE_VECTOR_WIDTH == 0);
436 assert(y % TILE_VECTOR_HEIGHT == 0);
437
438 assert((x % 4) == 0);
439 assert((y % 4) == 0);
440
441 /* color buffer */
442 for (i = 0; i < scene->fb.nr_cbufs; i++) {
443 stride[i] = scene->cbufs[i].stride;
444 color[i] = lp_rast_get_unswizzled_color_block_pointer(task, i, x, y, inputs->layer);
445 }
446
447 /* depth buffer */
448 if (scene->zsbuf.map) {
449 depth_stride = scene->zsbuf.stride;
450 depth = lp_rast_get_unswizzled_depth_block_pointer(task, x, y, inputs->layer);
451 }
452
453 assert(lp_check_alignment(state->jit_context.u8_blend_color, 16));
454
455 /*
456 * The rasterizer may produce fragments outside our
457 * allocated 4x4 blocks hence need to filter them out here.
458 */
459 if ((x % TILE_SIZE) < task->width && (y % TILE_SIZE) < task->height) {
460 /* not very accurate would need a popcount on the mask */
461 /* always count this not worth bothering? */
462 task->ps_invocations++;
463
464 /* run shader on 4x4 block */
465 BEGIN_JIT_CALL(state, task);
466 variant->jit_function[RAST_EDGE_TEST](&state->jit_context,
467 x, y,
468 inputs->frontfacing,
469 GET_A0(inputs),
470 GET_DADX(inputs),
471 GET_DADY(inputs),
472 color,
473 depth,
474 mask,
475 &task->thread_data,
476 stride,
477 depth_stride);
478 END_JIT_CALL();
479 }
480 }
481
482
483
484 /**
485 * Begin a new occlusion query.
486 * This is a bin command put in all bins.
487 * Called per thread.
488 */
489 static void
490 lp_rast_begin_query(struct lp_rasterizer_task *task,
491 const union lp_rast_cmd_arg arg)
492 {
493 struct llvmpipe_query *pq = arg.query_obj;
494
495 switch (pq->type) {
496 case PIPE_QUERY_OCCLUSION_COUNTER:
497 case PIPE_QUERY_OCCLUSION_PREDICATE:
498 pq->start[task->thread_index] = task->thread_data.vis_counter;
499 break;
500 case PIPE_QUERY_PIPELINE_STATISTICS:
501 pq->start[task->thread_index] = task->ps_invocations;
502 break;
503 default:
504 assert(0);
505 break;
506 }
507 }
508
509
510 /**
511 * End the current occlusion query.
512 * This is a bin command put in all bins.
513 * Called per thread.
514 */
515 static void
516 lp_rast_end_query(struct lp_rasterizer_task *task,
517 const union lp_rast_cmd_arg arg)
518 {
519 struct llvmpipe_query *pq = arg.query_obj;
520
521 switch (pq->type) {
522 case PIPE_QUERY_OCCLUSION_COUNTER:
523 case PIPE_QUERY_OCCLUSION_PREDICATE:
524 pq->end[task->thread_index] +=
525 task->thread_data.vis_counter - pq->start[task->thread_index];
526 pq->start[task->thread_index] = 0;
527 break;
528 case PIPE_QUERY_TIMESTAMP:
529 pq->end[task->thread_index] = os_time_get_nano();
530 break;
531 case PIPE_QUERY_PIPELINE_STATISTICS:
532 pq->end[task->thread_index] +=
533 task->ps_invocations - pq->start[task->thread_index];
534 pq->start[task->thread_index] = 0;
535 break;
536 default:
537 assert(0);
538 break;
539 }
540 }
541
542
543 void
544 lp_rast_set_state(struct lp_rasterizer_task *task,
545 const union lp_rast_cmd_arg arg)
546 {
547 task->state = arg.state;
548 }
549
550
551
552 /**
553 * Called when we're done writing to a color tile.
554 */
555 static void
556 lp_rast_tile_end(struct lp_rasterizer_task *task)
557 {
558 unsigned i;
559
560 for (i = 0; i < task->scene->num_active_queries; ++i) {
561 lp_rast_end_query(task, lp_rast_arg_query(task->scene->active_queries[i]));
562 }
563
564 /* debug */
565 memset(task->color_tiles, 0, sizeof(task->color_tiles));
566 task->depth_tile = NULL;
567
568 task->bin = NULL;
569 }
570
571 static lp_rast_cmd_func dispatch[LP_RAST_OP_MAX] =
572 {
573 lp_rast_clear_color,
574 lp_rast_clear_zstencil,
575 lp_rast_triangle_1,
576 lp_rast_triangle_2,
577 lp_rast_triangle_3,
578 lp_rast_triangle_4,
579 lp_rast_triangle_5,
580 lp_rast_triangle_6,
581 lp_rast_triangle_7,
582 lp_rast_triangle_8,
583 lp_rast_triangle_3_4,
584 lp_rast_triangle_3_16,
585 lp_rast_triangle_4_16,
586 lp_rast_shade_tile,
587 lp_rast_shade_tile_opaque,
588 lp_rast_begin_query,
589 lp_rast_end_query,
590 lp_rast_set_state,
591 };
592
593
594 static void
595 do_rasterize_bin(struct lp_rasterizer_task *task,
596 const struct cmd_bin *bin,
597 int x, int y)
598 {
599 const struct cmd_block *block;
600 unsigned k;
601
602 if (0)
603 lp_debug_bin(bin, x, y);
604
605 for (block = bin->head; block; block = block->next) {
606 for (k = 0; k < block->count; k++) {
607 dispatch[block->cmd[k]]( task, block->arg[k] );
608 }
609 }
610 }
611
612
613
614 /**
615 * Rasterize commands for a single bin.
616 * \param x, y position of the bin's tile in the framebuffer
617 * Must be called between lp_rast_begin() and lp_rast_end().
618 * Called per thread.
619 */
620 static void
621 rasterize_bin(struct lp_rasterizer_task *task,
622 const struct cmd_bin *bin, int x, int y )
623 {
624 lp_rast_tile_begin( task, bin, x, y );
625
626 do_rasterize_bin(task, bin, x, y);
627
628 lp_rast_tile_end(task);
629
630
631 /* Debug/Perf flags:
632 */
633 if (bin->head->count == 1) {
634 if (bin->head->cmd[0] == LP_RAST_OP_SHADE_TILE_OPAQUE)
635 LP_COUNT(nr_pure_shade_opaque_64);
636 else if (bin->head->cmd[0] == LP_RAST_OP_SHADE_TILE)
637 LP_COUNT(nr_pure_shade_64);
638 }
639 }
640
641
642 /* An empty bin is one that just loads the contents of the tile and
643 * stores them again unchanged. This typically happens when bins have
644 * been flushed for some reason in the middle of a frame, or when
645 * incremental updates are being made to a render target.
646 *
647 * Try to avoid doing pointless work in this case.
648 */
649 static boolean
650 is_empty_bin( const struct cmd_bin *bin )
651 {
652 return bin->head == NULL;
653 }
654
655
656 /**
657 * Rasterize/execute all bins within a scene.
658 * Called per thread.
659 */
660 static void
661 rasterize_scene(struct lp_rasterizer_task *task,
662 struct lp_scene *scene)
663 {
664 task->scene = scene;
665
666 if (!task->rast->no_rast && !scene->discard) {
667 /* loop over scene bins, rasterize each */
668 {
669 struct cmd_bin *bin;
670 int i, j;
671
672 assert(scene);
673 while ((bin = lp_scene_bin_iter_next(scene, &i, &j))) {
674 if (!is_empty_bin( bin ))
675 rasterize_bin(task, bin, i, j);
676 }
677 }
678 }
679
680
681 if (scene->fence) {
682 lp_fence_signal(scene->fence);
683 }
684
685 task->scene = NULL;
686 }
687
688
689 /**
690 * Called by setup module when it has something for us to render.
691 */
692 void
693 lp_rast_queue_scene( struct lp_rasterizer *rast,
694 struct lp_scene *scene)
695 {
696 LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__);
697
698 if (rast->num_threads == 0) {
699 /* no threading */
700
701 lp_rast_begin( rast, scene );
702
703 rasterize_scene( &rast->tasks[0], scene );
704
705 lp_rast_end( rast );
706
707 rast->curr_scene = NULL;
708 }
709 else {
710 /* threaded rendering! */
711 unsigned i;
712
713 lp_scene_enqueue( rast->full_scenes, scene );
714
715 /* signal the threads that there's work to do */
716 for (i = 0; i < rast->num_threads; i++) {
717 pipe_semaphore_signal(&rast->tasks[i].work_ready);
718 }
719 }
720
721 LP_DBG(DEBUG_SETUP, "%s done \n", __FUNCTION__);
722 }
723
724
725 void
726 lp_rast_finish( struct lp_rasterizer *rast )
727 {
728 if (rast->num_threads == 0) {
729 /* nothing to do */
730 }
731 else {
732 int i;
733
734 /* wait for work to complete */
735 for (i = 0; i < rast->num_threads; i++) {
736 pipe_semaphore_wait(&rast->tasks[i].work_done);
737 }
738 }
739 }
740
741
742 /**
743 * This is the thread's main entrypoint.
744 * It's a simple loop:
745 * 1. wait for work
746 * 2. do work
747 * 3. signal that we're done
748 */
749 static PIPE_THREAD_ROUTINE( thread_function, init_data )
750 {
751 struct lp_rasterizer_task *task = (struct lp_rasterizer_task *) init_data;
752 struct lp_rasterizer *rast = task->rast;
753 boolean debug = false;
754 unsigned fpstate = util_fpstate_get();
755
756 /* Make sure that denorms are treated like zeros. This is
757 * the behavior required by D3D10. OpenGL doesn't care.
758 */
759 util_fpstate_set_denorms_to_zero(fpstate);
760
761 while (1) {
762 /* wait for work */
763 if (debug)
764 debug_printf("thread %d waiting for work\n", task->thread_index);
765 pipe_semaphore_wait(&task->work_ready);
766
767 if (rast->exit_flag)
768 break;
769
770 if (task->thread_index == 0) {
771 /* thread[0]:
772 * - get next scene to rasterize
773 * - map the framebuffer surfaces
774 */
775 lp_rast_begin( rast,
776 lp_scene_dequeue( rast->full_scenes, TRUE ) );
777 }
778
779 /* Wait for all threads to get here so that threads[1+] don't
780 * get a null rast->curr_scene pointer.
781 */
782 pipe_barrier_wait( &rast->barrier );
783
784 /* do work */
785 if (debug)
786 debug_printf("thread %d doing work\n", task->thread_index);
787
788 rasterize_scene(task,
789 rast->curr_scene);
790
791 /* wait for all threads to finish with this scene */
792 pipe_barrier_wait( &rast->barrier );
793
794 /* XXX: shouldn't be necessary:
795 */
796 if (task->thread_index == 0) {
797 lp_rast_end( rast );
798 }
799
800 /* signal done with work */
801 if (debug)
802 debug_printf("thread %d done working\n", task->thread_index);
803
804 pipe_semaphore_signal(&task->work_done);
805 }
806
807 return NULL;
808 }
809
810
811 /**
812 * Initialize semaphores and spawn the threads.
813 */
814 static void
815 create_rast_threads(struct lp_rasterizer *rast)
816 {
817 unsigned i;
818
819 /* NOTE: if num_threads is zero, we won't use any threads */
820 for (i = 0; i < rast->num_threads; i++) {
821 pipe_semaphore_init(&rast->tasks[i].work_ready, 0);
822 pipe_semaphore_init(&rast->tasks[i].work_done, 0);
823 rast->threads[i] = pipe_thread_create(thread_function,
824 (void *) &rast->tasks[i]);
825 }
826 }
827
828
829
830 /**
831 * Create new lp_rasterizer. If num_threads is zero, don't create any
832 * new threads, do rendering synchronously.
833 * \param num_threads number of rasterizer threads to create
834 */
835 struct lp_rasterizer *
836 lp_rast_create( unsigned num_threads )
837 {
838 struct lp_rasterizer *rast;
839 unsigned i;
840
841 rast = CALLOC_STRUCT(lp_rasterizer);
842 if (!rast) {
843 goto no_rast;
844 }
845
846 rast->full_scenes = lp_scene_queue_create();
847 if (!rast->full_scenes) {
848 goto no_full_scenes;
849 }
850
851 for (i = 0; i < Elements(rast->tasks); i++) {
852 struct lp_rasterizer_task *task = &rast->tasks[i];
853 task->rast = rast;
854 task->thread_index = i;
855 }
856
857 rast->num_threads = num_threads;
858
859 rast->no_rast = debug_get_bool_option("LP_NO_RAST", FALSE);
860
861 create_rast_threads(rast);
862
863 /* for synchronizing rasterization threads */
864 pipe_barrier_init( &rast->barrier, rast->num_threads );
865
866 memset(lp_dummy_tile, 0, sizeof lp_dummy_tile);
867
868 return rast;
869
870 no_full_scenes:
871 FREE(rast);
872 no_rast:
873 return NULL;
874 }
875
876
877 /* Shutdown:
878 */
879 void lp_rast_destroy( struct lp_rasterizer *rast )
880 {
881 unsigned i;
882
883 /* Set exit_flag and signal each thread's work_ready semaphore.
884 * Each thread will be woken up, notice that the exit_flag is set and
885 * break out of its main loop. The thread will then exit.
886 */
887 rast->exit_flag = TRUE;
888 for (i = 0; i < rast->num_threads; i++) {
889 pipe_semaphore_signal(&rast->tasks[i].work_ready);
890 }
891
892 /* Wait for threads to terminate before cleaning up per-thread data */
893 for (i = 0; i < rast->num_threads; i++) {
894 pipe_thread_wait(rast->threads[i]);
895 }
896
897 /* Clean up per-thread data */
898 for (i = 0; i < rast->num_threads; i++) {
899 pipe_semaphore_destroy(&rast->tasks[i].work_ready);
900 pipe_semaphore_destroy(&rast->tasks[i].work_done);
901 }
902
903 /* for synchronizing rasterization threads */
904 pipe_barrier_destroy( &rast->barrier );
905
906 lp_scene_queue_destroy(rast->full_scenes);
907
908 FREE(rast);
909 }
910
911