llvmpipe: implement lp_rast_load_zstencil
[mesa.git] / src / gallium / drivers / llvmpipe / lp_rast.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 #include <limits.h>
29 #include "util/u_memory.h"
30 #include "util/u_math.h"
31 #include "util/u_cpu_detect.h"
32 #include "util/u_surface.h"
33
34 #include "lp_scene_queue.h"
35 #include "lp_debug.h"
36 #include "lp_fence.h"
37 #include "lp_rast.h"
38 #include "lp_rast_priv.h"
39 #include "lp_tile_soa.h"
40 #include "lp_bld_debug.h"
41 #include "lp_scene.h"
42
43
44 /**
45 * Begin the rasterization phase.
46 * Map the framebuffer surfaces. Initialize the 'rast' state.
47 */
48 static boolean
49 lp_rast_begin( struct lp_rasterizer *rast,
50 const struct pipe_framebuffer_state *fb,
51 boolean write_color,
52 boolean write_zstencil )
53 {
54 struct pipe_screen *screen = rast->screen;
55 struct pipe_surface *cbuf, *zsbuf;
56 int i;
57
58 LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__);
59
60 util_copy_framebuffer_state(&rast->state.fb, fb);
61
62 rast->state.write_zstencil = write_zstencil;
63 rast->state.write_color = write_color;
64
65 rast->check_for_clipped_tiles = (fb->width % TILE_SIZE != 0 ||
66 fb->height % TILE_SIZE != 0);
67
68
69 for (i = 0; i < rast->state.fb.nr_cbufs; i++) {
70 cbuf = rast->state.fb.cbufs[i];
71 if (cbuf) {
72 rast->cbuf_transfer[i] = screen->get_tex_transfer(rast->screen,
73 cbuf->texture,
74 cbuf->face,
75 cbuf->level,
76 cbuf->zslice,
77 PIPE_TRANSFER_READ_WRITE,
78 0, 0,
79 cbuf->width,
80 cbuf->height);
81 if (!rast->cbuf_transfer[i])
82 goto fail;
83
84 rast->cbuf_map[i] = screen->transfer_map(rast->screen,
85 rast->cbuf_transfer[i]);
86 if (!rast->cbuf_map[i])
87 goto fail;
88 }
89 }
90
91 zsbuf = rast->state.fb.zsbuf;
92 if (zsbuf) {
93 rast->zsbuf_transfer = screen->get_tex_transfer(rast->screen,
94 zsbuf->texture,
95 zsbuf->face,
96 zsbuf->level,
97 zsbuf->zslice,
98 PIPE_TRANSFER_READ_WRITE,
99 0, 0,
100 zsbuf->width,
101 zsbuf->height);
102 if (!rast->zsbuf_transfer)
103 goto fail;
104
105 rast->zsbuf_map = screen->transfer_map(rast->screen,
106 rast->zsbuf_transfer);
107 if (!rast->zsbuf_map)
108 goto fail;
109 }
110
111 return TRUE;
112
113 fail:
114 /* Unmap and release transfers?
115 */
116 return FALSE;
117 }
118
119
120 /**
121 * Finish the rasterization phase.
122 * Unmap framebuffer surfaces.
123 */
124 static void
125 lp_rast_end( struct lp_rasterizer *rast )
126 {
127 struct pipe_screen *screen = rast->screen;
128 unsigned i;
129
130 for (i = 0; i < rast->state.fb.nr_cbufs; i++) {
131 if (rast->cbuf_map[i])
132 screen->transfer_unmap(screen, rast->cbuf_transfer[i]);
133
134 if (rast->cbuf_transfer[i])
135 screen->tex_transfer_destroy(rast->cbuf_transfer[i]);
136
137 rast->cbuf_transfer[i] = NULL;
138 rast->cbuf_map[i] = NULL;
139 }
140
141 if (rast->zsbuf_map)
142 screen->transfer_unmap(screen, rast->zsbuf_transfer);
143
144 if (rast->zsbuf_transfer)
145 screen->tex_transfer_destroy(rast->zsbuf_transfer);
146
147 rast->zsbuf_transfer = NULL;
148 rast->zsbuf_map = NULL;
149 }
150
151
152 /**
153 * Begining rasterization of a tile.
154 * \param x window X position of the tile, in pixels
155 * \param y window Y position of the tile, in pixels
156 */
157 static void
158 lp_rast_start_tile( struct lp_rasterizer *rast,
159 unsigned thread_index,
160 unsigned x, unsigned y )
161 {
162 LP_DBG(DEBUG_RAST, "%s %d,%d\n", __FUNCTION__, x, y);
163
164 rast->tasks[thread_index].x = x;
165 rast->tasks[thread_index].y = y;
166 }
167
168
169 /**
170 * Clear the rasterizer's current color tile.
171 * This is a bin command called during bin processing.
172 */
173 void lp_rast_clear_color( struct lp_rasterizer *rast,
174 unsigned thread_index,
175 const union lp_rast_cmd_arg arg )
176 {
177 const uint8_t *clear_color = arg.clear_color;
178 uint8_t **color_tile = rast->tasks[thread_index].tile.color;
179 unsigned i;
180
181 LP_DBG(DEBUG_RAST, "%s 0x%x,0x%x,0x%x,0x%x\n", __FUNCTION__,
182 clear_color[0],
183 clear_color[1],
184 clear_color[2],
185 clear_color[3]);
186
187 if (clear_color[0] == clear_color[1] &&
188 clear_color[1] == clear_color[2] &&
189 clear_color[2] == clear_color[3]) {
190 for (i = 0; i < rast->state.fb.nr_cbufs; i++) {
191 memset(color_tile[i], clear_color[0], TILE_SIZE * TILE_SIZE * 4);
192 }
193 }
194 else {
195 unsigned x, y, chan;
196 for (i = 0; i < rast->state.fb.nr_cbufs; i++)
197 for (y = 0; y < TILE_SIZE; y++)
198 for (x = 0; x < TILE_SIZE; x++)
199 for (chan = 0; chan < 4; ++chan)
200 TILE_PIXEL(color_tile[i], x, y, chan) = clear_color[chan];
201 }
202 }
203
204
205 /**
206 * Clear the rasterizer's current z/stencil tile.
207 * This is a bin command called during bin processing.
208 */
209 void lp_rast_clear_zstencil( struct lp_rasterizer *rast,
210 unsigned thread_index,
211 const union lp_rast_cmd_arg arg)
212 {
213 unsigned i, j;
214 uint32_t *depth_tile = rast->tasks[thread_index].tile.depth;
215
216 LP_DBG(DEBUG_RAST, "%s 0x%x\n", __FUNCTION__, arg.clear_zstencil);
217
218 for (i = 0; i < TILE_SIZE; i++)
219 for (j = 0; j < TILE_SIZE; j++)
220 depth_tile[i*TILE_SIZE + j] = arg.clear_zstencil;
221 }
222
223
224 /**
225 * Load tile color from the framebuffer surface.
226 * This is a bin command called during bin processing.
227 */
228 void lp_rast_load_color( struct lp_rasterizer *rast,
229 unsigned thread_index,
230 const union lp_rast_cmd_arg arg)
231 {
232 struct lp_rasterizer_task *task = &rast->tasks[thread_index];
233 const unsigned x = task->x;
234 const unsigned y = task->y;
235 unsigned i;
236
237 LP_DBG(DEBUG_RAST, "%s at %u, %u\n", __FUNCTION__, x, y);
238
239 for (i = 0; i < rast->state.fb.nr_cbufs; i++) {
240 struct pipe_transfer *transfer = rast->cbuf_transfer[i];
241 int w = TILE_SIZE;
242 int h = TILE_SIZE;
243
244 if (x >= transfer->width)
245 continue;
246
247 if (y >= transfer->height)
248 continue;
249 /* XXX: require tile-size aligned render target dimensions:
250 */
251 if (x + w > transfer->width)
252 w -= x + w - transfer->width;
253
254 if (y + h > transfer->height)
255 h -= y + h - transfer->height;
256
257 assert(w >= 0);
258 assert(h >= 0);
259 assert(w <= TILE_SIZE);
260 assert(h <= TILE_SIZE);
261
262 lp_tile_read_4ub(transfer->texture->format,
263 rast->tasks[thread_index].tile.color[i],
264 rast->cbuf_map[i],
265 transfer->stride,
266 x, y,
267 w, h);
268 }
269 }
270
271
272 static void
273 lp_tile_read_z32(uint32_t *tile,
274 const uint8_t *map,
275 unsigned map_stride,
276 unsigned x0, unsigned y0, unsigned w, unsigned h)
277 {
278 unsigned x, y;
279 const uint8_t *map_row = map + y0*map_stride;
280 for (y = 0; y < h; ++y) {
281 const uint32_t *map_pixel = (uint32_t *)(map_row + x0*4);
282 for (x = 0; x < w; ++x) {
283 *tile++ = *map_pixel++;
284 }
285 map_row += map_stride;
286 }
287 }
288
289 /**
290 * Load tile z/stencil from the framebuffer surface.
291 * This is a bin command called during bin processing.
292 */
293 void lp_rast_load_zstencil( struct lp_rasterizer *rast,
294 unsigned thread_index,
295 const union lp_rast_cmd_arg arg )
296 {
297 const unsigned x = rast->tasks[thread_index].x;
298 const unsigned y = rast->tasks[thread_index].y;
299 unsigned w = TILE_SIZE;
300 unsigned h = TILE_SIZE;
301
302 if (x + w > rast->state.fb.width)
303 w -= x + w - rast->state.fb.width;
304
305 if (y + h > rast->state.fb.height)
306 h -= y + h - rast->state.fb.height;
307
308 LP_DBG(DEBUG_RAST, "%s %d,%d %dx%d\n", __FUNCTION__, x, y, w, h);
309
310 assert(rast->zsbuf_transfer->texture->format == PIPE_FORMAT_Z32_UNORM);
311 lp_tile_read_z32(rast->tasks[thread_index].tile.depth,
312 rast->zsbuf_map,
313 rast->zsbuf_transfer->stride,
314 x, y, w, h);
315 }
316
317
318 void lp_rast_set_state( struct lp_rasterizer *rast,
319 unsigned thread_index,
320 const union lp_rast_cmd_arg arg )
321 {
322 const struct lp_rast_state *state = arg.set_state;
323
324 LP_DBG(DEBUG_RAST, "%s %p\n", __FUNCTION__, (void *) state);
325
326 /* just set the current state pointer for this rasterizer */
327 rast->tasks[thread_index].current_state = state;
328 }
329
330
331
332 /* Within a tile:
333 */
334
335 /**
336 * Run the shader on all blocks in a tile. This is used when a tile is
337 * completely contained inside a triangle.
338 * This is a bin command called during bin processing.
339 */
340 void lp_rast_shade_tile( struct lp_rasterizer *rast,
341 unsigned thread_index,
342 const union lp_rast_cmd_arg arg )
343 {
344 /* Set c1,c2,c3 to large values so the in/out test always passes */
345 const int32_t c1 = INT_MIN, c2 = INT_MIN, c3 = INT_MIN;
346 const struct lp_rast_shader_inputs *inputs = arg.shade_tile;
347 const unsigned tile_x = rast->tasks[thread_index].x;
348 const unsigned tile_y = rast->tasks[thread_index].y;
349 unsigned x, y;
350
351 LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__);
352
353 /* Use the existing preference for 4x4 (four quads) shading:
354 */
355 for (y = 0; y < TILE_SIZE; y += 4)
356 for (x = 0; x < TILE_SIZE; x += 4)
357 lp_rast_shade_quads( rast,
358 thread_index,
359 inputs,
360 tile_x + x,
361 tile_y + y,
362 c1, c2, c3);
363 }
364
365
366 /**
367 * Compute shading for a 4x4 block of pixels.
368 * This is a bin command called during bin processing.
369 */
370 void lp_rast_shade_quads( struct lp_rasterizer *rast,
371 unsigned thread_index,
372 const struct lp_rast_shader_inputs *inputs,
373 unsigned x, unsigned y,
374 int32_t c1, int32_t c2, int32_t c3)
375 {
376 const struct lp_rast_state *state = rast->tasks[thread_index].current_state;
377 struct lp_rast_tile *tile = &rast->tasks[thread_index].tile;
378 uint8_t *color[PIPE_MAX_COLOR_BUFS];
379 void *depth;
380 unsigned i;
381 unsigned ix, iy;
382 int block_offset;
383
384 #ifdef DEBUG
385 assert(state);
386
387 /* Sanity checks */
388 assert(x % TILE_VECTOR_WIDTH == 0);
389 assert(y % TILE_VECTOR_HEIGHT == 0);
390
391 assert((x % 4) == 0);
392 assert((y % 4) == 0);
393 #endif
394
395 ix = x % TILE_SIZE;
396 iy = y % TILE_SIZE;
397
398 /* offset of the 16x16 pixel block within the tile */
399 block_offset = ((iy/4)*(16*16) + (ix/4)*16);
400
401 /* color buffer */
402 for (i = 0; i < rast->state.fb.nr_cbufs; i++)
403 color[i] = tile->color[i] + 4 * block_offset;
404
405 /* depth buffer */
406 depth = tile->depth + block_offset;
407
408
409
410 #ifdef DEBUG
411 assert(lp_check_alignment(tile->depth, 16));
412 assert(lp_check_alignment(tile->color[0], 16));
413 assert(lp_check_alignment(state->jit_context.blend_color, 16));
414
415 assert(lp_check_alignment(inputs->step[0], 16));
416 assert(lp_check_alignment(inputs->step[1], 16));
417 assert(lp_check_alignment(inputs->step[2], 16));
418 #endif
419
420 /* run shader */
421 state->jit_function( &state->jit_context,
422 x, y,
423 inputs->a0,
424 inputs->dadx,
425 inputs->dady,
426 color,
427 depth,
428 c1, c2, c3,
429 inputs->step[0], inputs->step[1], inputs->step[2]);
430 }
431
432
433 /* End of tile:
434 */
435
436
437 /**
438 * Write the rasterizer's color tile to the framebuffer.
439 */
440 static void lp_rast_store_color( struct lp_rasterizer *rast,
441 unsigned thread_index)
442 {
443 const unsigned x = rast->tasks[thread_index].x;
444 const unsigned y = rast->tasks[thread_index].y;
445 unsigned i;
446
447 for (i = 0; i < rast->state.fb.nr_cbufs; i++) {
448 struct pipe_transfer *transfer = rast->cbuf_transfer[i];
449 int w = TILE_SIZE;
450 int h = TILE_SIZE;
451
452 if (x >= transfer->width)
453 continue;
454
455 if (y >= transfer->height)
456 continue;
457
458 /* XXX: require tile-size aligned render target dimensions:
459 */
460 if (x + w > transfer->width)
461 w -= x + w - transfer->width;
462
463 if (y + h > transfer->height)
464 h -= y + h - transfer->height;
465
466 assert(w >= 0);
467 assert(h >= 0);
468 assert(w <= TILE_SIZE);
469 assert(h <= TILE_SIZE);
470
471 LP_DBG(DEBUG_RAST, "%s [%u] %d,%d %dx%d\n", __FUNCTION__,
472 thread_index, x, y, w, h);
473
474 lp_tile_write_4ub(transfer->texture->format,
475 rast->tasks[thread_index].tile.color[i],
476 rast->cbuf_map[i],
477 transfer->stride,
478 x, y,
479 w, h);
480 }
481 }
482
483
484 static void
485 lp_tile_write_z32(const uint32_t *src, uint8_t *dst, unsigned dst_stride,
486 unsigned x0, unsigned y0, unsigned w, unsigned h)
487 {
488 unsigned x, y;
489 uint8_t *dst_row = dst + y0*dst_stride;
490 for (y = 0; y < h; ++y) {
491 uint32_t *dst_pixel = (uint32_t *)(dst_row + x0*4);
492 for (x = 0; x < w; ++x) {
493 *dst_pixel++ = *src++;
494 }
495 dst_row += dst_stride;
496 }
497 }
498
499 /**
500 * Write the rasterizer's z/stencil tile to the framebuffer.
501 */
502 static void lp_rast_store_zstencil( struct lp_rasterizer *rast,
503 unsigned thread_index )
504 {
505 const unsigned x = rast->tasks[thread_index].x;
506 const unsigned y = rast->tasks[thread_index].y;
507 unsigned w = TILE_SIZE;
508 unsigned h = TILE_SIZE;
509
510 if (x + w > rast->state.fb.width)
511 w -= x + w - rast->state.fb.width;
512
513 if (y + h > rast->state.fb.height)
514 h -= y + h - rast->state.fb.height;
515
516 LP_DBG(DEBUG_RAST, "%s %d,%d %dx%d\n", __FUNCTION__, x, y, w, h);
517
518 assert(rast->zsbuf_transfer->texture->format == PIPE_FORMAT_Z32_UNORM);
519 lp_tile_write_z32(rast->tasks[thread_index].tile.depth,
520 rast->zsbuf_map,
521 rast->zsbuf_transfer->stride,
522 x, y, w, h);
523 }
524
525
526 /**
527 * Write the rasterizer's tiles to the framebuffer.
528 */
529 static void
530 lp_rast_end_tile( struct lp_rasterizer *rast,
531 unsigned thread_index )
532 {
533 LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__);
534
535 if (rast->state.write_color)
536 lp_rast_store_color(rast, thread_index);
537
538 if (rast->state.write_zstencil)
539 lp_rast_store_zstencil(rast, thread_index);
540 }
541
542
543 /**
544 * Signal on a fence. This is called during bin execution/rasterization.
545 * Called per thread.
546 */
547 void lp_rast_fence( struct lp_rasterizer *rast,
548 unsigned thread_index,
549 const union lp_rast_cmd_arg arg )
550 {
551 struct lp_fence *fence = arg.fence;
552
553 pipe_mutex_lock( fence->mutex );
554
555 fence->count++;
556 assert(fence->count <= fence->rank);
557
558 LP_DBG(DEBUG_RAST, "%s count=%u rank=%u\n", __FUNCTION__,
559 fence->count, fence->rank);
560
561 pipe_condvar_signal( fence->signalled );
562
563 pipe_mutex_unlock( fence->mutex );
564 }
565
566
567 /**
568 * When all the threads are done rasterizing a scene, one thread will
569 * call this function to reset the scene and put it onto the empty queue.
570 */
571 static void
572 release_scene( struct lp_rasterizer *rast,
573 struct lp_scene *scene )
574 {
575 util_unreference_framebuffer_state( &scene->fb );
576
577 lp_scene_reset( scene );
578 lp_scene_enqueue( rast->empty_scenes, scene );
579 rast->curr_scene = NULL;
580 }
581
582
583 /**
584 * Rasterize commands for a single bin.
585 * \param x, y position of the bin's tile in the framebuffer
586 * Must be called between lp_rast_begin() and lp_rast_end().
587 * Called per thread.
588 */
589 static void
590 rasterize_bin( struct lp_rasterizer *rast,
591 unsigned thread_index,
592 const struct cmd_bin *bin,
593 int x, int y)
594 {
595 const struct cmd_block_list *commands = &bin->commands;
596 struct cmd_block *block;
597 unsigned k;
598
599 lp_rast_start_tile( rast, thread_index, x, y );
600
601 /* simply execute each of the commands in the block list */
602 for (block = commands->head; block; block = block->next) {
603 for (k = 0; k < block->count; k++) {
604 block->cmd[k]( rast, thread_index, block->arg[k] );
605 }
606 }
607
608 lp_rast_end_tile( rast, thread_index );
609 }
610
611 /* An empty bin is one that just loads the contents of the tile and
612 * stores them again unchanged. This typically happens when bins have
613 * been flushed for some reason in the middle of a frame, or when
614 * incremental updates are being made to a render target.
615 *
616 * Try to avoid doing pointless work in this case.
617 */
618 static boolean
619 is_empty_bin( const struct cmd_bin *bin )
620 {
621 const struct cmd_block *head = bin->commands.head;
622 int i;
623
624 /* We emit at most two load-tile commands at the start of the first
625 * command block. If there are more than two commands in the
626 * block, we know that the bin is non-empty.
627 */
628 if (head->next != NULL ||
629 head->count > 2)
630 return FALSE;
631
632 for (i = 0; i < head->count; i++)
633 if (head->cmd[i] != lp_rast_load_color &&
634 head->cmd[i] != lp_rast_load_zstencil)
635 return FALSE;
636
637 return TRUE;
638 }
639
640
641
642 /**
643 * Rasterize/execute all bins within a scene.
644 * Called per thread.
645 */
646 static void
647 rasterize_scene( struct lp_rasterizer *rast,
648 unsigned thread_index,
649 struct lp_scene *scene,
650 bool write_depth )
651 {
652 /* loop over scene bins, rasterize each */
653 #if 0
654 {
655 unsigned i, j;
656 for (i = 0; i < scene->tiles_x; i++) {
657 for (j = 0; j < scene->tiles_y; j++) {
658 struct cmd_bin *bin = lp_get_bin(scene, i, j);
659 rasterize_bin( rast, thread_index,
660 bin, i * TILE_SIZE, j * TILE_SIZE );
661 }
662 }
663 }
664 #else
665 {
666 struct cmd_bin *bin;
667 int x, y;
668
669 assert(scene);
670 while ((bin = lp_scene_bin_iter_next(scene, &x, &y))) {
671 if (!is_empty_bin( bin ))
672 rasterize_bin( rast, thread_index, bin, x * TILE_SIZE, y * TILE_SIZE);
673 }
674 }
675 #endif
676 }
677
678
679 /**
680 * Called by setup module when it has something for us to render.
681 */
682 void
683 lp_rasterize_scene( struct lp_rasterizer *rast,
684 struct lp_scene *scene,
685 const struct pipe_framebuffer_state *fb,
686 bool write_depth )
687 {
688 boolean debug = false;
689
690 LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__);
691
692 if (debug) {
693 unsigned x, y;
694 printf("rasterize scene:\n");
695 printf(" data size: %u\n", lp_scene_data_size(scene));
696 for (y = 0; y < scene->tiles_y; y++) {
697 for (x = 0; x < scene->tiles_x; x++) {
698 printf(" bin %u, %u size: %u\n", x, y,
699 lp_scene_bin_size(scene, x, y));
700 }
701 }
702 }
703
704 /* save framebuffer state in the bin */
705 util_copy_framebuffer_state(&scene->fb, fb);
706 scene->write_depth = write_depth;
707
708 if (rast->num_threads == 0) {
709 /* no threading */
710
711 lp_rast_begin( rast, fb,
712 fb->nr_cbufs != 0, /* always write color if cbufs present */
713 fb->zsbuf != NULL && write_depth );
714
715 lp_scene_bin_iter_begin( scene );
716 rasterize_scene( rast, 0, scene, write_depth );
717
718 release_scene( rast, scene );
719
720 lp_rast_end( rast );
721 }
722 else {
723 /* threaded rendering! */
724 unsigned i;
725
726 lp_scene_enqueue( rast->full_scenes, scene );
727
728 /* signal the threads that there's work to do */
729 for (i = 0; i < rast->num_threads; i++) {
730 pipe_semaphore_signal(&rast->tasks[i].work_ready);
731 }
732
733 /* wait for work to complete */
734 for (i = 0; i < rast->num_threads; i++) {
735 pipe_semaphore_wait(&rast->tasks[i].work_done);
736 }
737 }
738
739 LP_DBG(DEBUG_SETUP, "%s done \n", __FUNCTION__);
740 }
741
742
743 /**
744 * This is the thread's main entrypoint.
745 * It's a simple loop:
746 * 1. wait for work
747 * 2. do work
748 * 3. signal that we're done
749 */
750 static void *
751 thread_func( void *init_data )
752 {
753 struct lp_rasterizer_task *task = (struct lp_rasterizer_task *) init_data;
754 struct lp_rasterizer *rast = task->rast;
755 boolean debug = false;
756
757 while (1) {
758 /* wait for work */
759 if (debug)
760 debug_printf("thread %d waiting for work\n", task->thread_index);
761 pipe_semaphore_wait(&task->work_ready);
762
763 if (task->thread_index == 0) {
764 /* thread[0]:
765 * - get next scene to rasterize
766 * - map the framebuffer surfaces
767 */
768 const struct pipe_framebuffer_state *fb;
769 boolean write_depth;
770
771 rast->curr_scene = lp_scene_dequeue( rast->full_scenes );
772
773 lp_scene_bin_iter_begin( rast->curr_scene );
774
775 fb = &rast->curr_scene->fb;
776 write_depth = rast->curr_scene->write_depth;
777
778 lp_rast_begin( rast, fb,
779 fb->nr_cbufs != 0,
780 fb->zsbuf != NULL && write_depth );
781 }
782
783 /* Wait for all threads to get here so that threads[1+] don't
784 * get a null rast->curr_scene pointer.
785 */
786 pipe_barrier_wait( &rast->barrier );
787
788 /* do work */
789 if (debug)
790 debug_printf("thread %d doing work\n", task->thread_index);
791 rasterize_scene(rast,
792 task->thread_index,
793 rast->curr_scene,
794 rast->curr_scene->write_depth);
795
796 /* wait for all threads to finish with this scene */
797 pipe_barrier_wait( &rast->barrier );
798
799 if (task->thread_index == 0) {
800 /* thread[0]:
801 * - release the scene object
802 * - unmap the framebuffer surfaces
803 */
804 release_scene( rast, rast->curr_scene );
805 lp_rast_end( rast );
806 }
807
808 /* signal done with work */
809 if (debug)
810 debug_printf("thread %d done working\n", task->thread_index);
811 pipe_semaphore_signal(&task->work_done);
812 }
813
814 return NULL;
815 }
816
817
818 /**
819 * Initialize semaphores and spawn the threads.
820 */
821 static void
822 create_rast_threads(struct lp_rasterizer *rast)
823 {
824 unsigned i;
825
826 rast->num_threads = util_cpu_caps.nr_cpus;
827 rast->num_threads = debug_get_num_option("LP_NUM_THREADS", rast->num_threads);
828 rast->num_threads = MIN2(rast->num_threads, MAX_THREADS);
829
830 /* NOTE: if num_threads is zero, we won't use any threads */
831 for (i = 0; i < rast->num_threads; i++) {
832 pipe_semaphore_init(&rast->tasks[i].work_ready, 0);
833 pipe_semaphore_init(&rast->tasks[i].work_done, 0);
834 rast->threads[i] = pipe_thread_create(thread_func,
835 (void *) &rast->tasks[i]);
836 }
837 }
838
839
840
841 /**
842 * Create new lp_rasterizer.
843 * \param empty the queue to put empty scenes on after we've finished
844 * processing them.
845 */
846 struct lp_rasterizer *
847 lp_rast_create( struct pipe_screen *screen, struct lp_scene_queue *empty )
848 {
849 struct lp_rasterizer *rast;
850 unsigned i, cbuf;
851
852 rast = CALLOC_STRUCT(lp_rasterizer);
853 if(!rast)
854 return NULL;
855
856 rast->screen = screen;
857
858 rast->empty_scenes = empty;
859 rast->full_scenes = lp_scene_queue_create();
860
861 for (i = 0; i < Elements(rast->tasks); i++) {
862 for (cbuf = 0; cbuf < PIPE_MAX_COLOR_BUFS; cbuf++ )
863 rast->tasks[i].tile.color[cbuf] = align_malloc( TILE_SIZE*TILE_SIZE*4, 16 );
864
865 rast->tasks[i].tile.depth = align_malloc( TILE_SIZE*TILE_SIZE*4, 16 );
866 rast->tasks[i].rast = rast;
867 rast->tasks[i].thread_index = i;
868 }
869
870 create_rast_threads(rast);
871
872 /* for synchronizing rasterization threads */
873 pipe_barrier_init( &rast->barrier, rast->num_threads );
874
875 return rast;
876 }
877
878
879 /* Shutdown:
880 */
881 void lp_rast_destroy( struct lp_rasterizer *rast )
882 {
883 unsigned i, cbuf;
884
885 util_unreference_framebuffer_state(&rast->state.fb);
886
887 for (i = 0; i < Elements(rast->tasks); i++) {
888 align_free(rast->tasks[i].tile.depth);
889 for (cbuf = 0; cbuf < PIPE_MAX_COLOR_BUFS; cbuf++ )
890 align_free(rast->tasks[i].tile.color[cbuf]);
891 }
892
893 /* for synchronizing rasterization threads */
894 pipe_barrier_destroy( &rast->barrier );
895
896 FREE(rast);
897 }
898
899
900 /** Return number of rasterization threads */
901 unsigned
902 lp_rast_get_num_threads( struct lp_rasterizer *rast )
903 {
904 return rast->num_threads;
905 }