82c006d78b0b35331b5e4b28b029198e69185b80
[mesa.git] / src / gallium / drivers / llvmpipe / lp_rast.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 #include <limits.h>
29 #include "util/u_memory.h"
30 #include "util/u_math.h"
31 #include "util/u_cpu_detect.h"
32 #include "util/u_surface.h"
33
34 #include "lp_scene_queue.h"
35 #include "lp_debug.h"
36 #include "lp_fence.h"
37 #include "lp_perf.h"
38 #include "lp_rast.h"
39 #include "lp_rast_priv.h"
40 #include "lp_tile_soa.h"
41 #include "gallivm/lp_bld_debug.h"
42 #include "lp_scene.h"
43
44
45 /**
46 * Begin the rasterization phase.
47 * Map the framebuffer surfaces. Initialize the 'rast' state.
48 */
49 static boolean
50 lp_rast_begin( struct lp_rasterizer *rast,
51 const struct pipe_framebuffer_state *fb,
52 boolean write_color,
53 boolean write_zstencil )
54 {
55 struct pipe_screen *screen = rast->screen;
56 struct pipe_surface *cbuf, *zsbuf;
57 int i;
58
59 LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__);
60
61 util_copy_framebuffer_state(&rast->state.fb, fb);
62
63 rast->state.write_zstencil = write_zstencil;
64 rast->state.write_color = write_color;
65
66 rast->check_for_clipped_tiles = (fb->width % TILE_SIZE != 0 ||
67 fb->height % TILE_SIZE != 0);
68
69
70 for (i = 0; i < rast->state.fb.nr_cbufs; i++) {
71 cbuf = rast->state.fb.cbufs[i];
72 if (cbuf) {
73 rast->cbuf_transfer[i] = screen->get_tex_transfer(rast->screen,
74 cbuf->texture,
75 cbuf->face,
76 cbuf->level,
77 cbuf->zslice,
78 PIPE_TRANSFER_READ_WRITE,
79 0, 0,
80 cbuf->width,
81 cbuf->height);
82 if (!rast->cbuf_transfer[i])
83 goto fail;
84
85 rast->cbuf_map[i] = screen->transfer_map(rast->screen,
86 rast->cbuf_transfer[i]);
87 if (!rast->cbuf_map[i])
88 goto fail;
89 }
90 }
91
92 zsbuf = rast->state.fb.zsbuf;
93 if (zsbuf) {
94 rast->zsbuf_transfer = screen->get_tex_transfer(rast->screen,
95 zsbuf->texture,
96 zsbuf->face,
97 zsbuf->level,
98 zsbuf->zslice,
99 PIPE_TRANSFER_READ_WRITE,
100 0, 0,
101 zsbuf->width,
102 zsbuf->height);
103 if (!rast->zsbuf_transfer)
104 goto fail;
105
106 rast->zsbuf_map = screen->transfer_map(rast->screen,
107 rast->zsbuf_transfer);
108 if (!rast->zsbuf_map)
109 goto fail;
110 }
111
112 return TRUE;
113
114 fail:
115 /* Unmap and release transfers?
116 */
117 return FALSE;
118 }
119
120
121 /**
122 * Finish the rasterization phase.
123 * Unmap framebuffer surfaces.
124 */
125 static void
126 lp_rast_end( struct lp_rasterizer *rast )
127 {
128 struct pipe_screen *screen = rast->screen;
129 unsigned i;
130
131 for (i = 0; i < rast->state.fb.nr_cbufs; i++) {
132 if (rast->cbuf_map[i])
133 screen->transfer_unmap(screen, rast->cbuf_transfer[i]);
134
135 if (rast->cbuf_transfer[i])
136 screen->tex_transfer_destroy(rast->cbuf_transfer[i]);
137
138 rast->cbuf_transfer[i] = NULL;
139 rast->cbuf_map[i] = NULL;
140 }
141
142 if (rast->zsbuf_map)
143 screen->transfer_unmap(screen, rast->zsbuf_transfer);
144
145 if (rast->zsbuf_transfer)
146 screen->tex_transfer_destroy(rast->zsbuf_transfer);
147
148 rast->zsbuf_transfer = NULL;
149 rast->zsbuf_map = NULL;
150 }
151
152
153 /**
154 * Begining rasterization of a tile.
155 * \param x window X position of the tile, in pixels
156 * \param y window Y position of the tile, in pixels
157 */
158 static void
159 lp_rast_start_tile(struct lp_rasterizer_task *task,
160 unsigned x, unsigned y)
161 {
162 LP_DBG(DEBUG_RAST, "%s %d,%d\n", __FUNCTION__, x, y);
163
164 task->x = x;
165 task->y = y;
166 }
167
168
169 /**
170 * Clear the rasterizer's current color tile.
171 * This is a bin command called during bin processing.
172 */
173 void
174 lp_rast_clear_color(struct lp_rasterizer_task *task,
175 const union lp_rast_cmd_arg arg)
176 {
177 struct lp_rasterizer *rast = task->rast;
178 const uint8_t *clear_color = arg.clear_color;
179 uint8_t **color_tile = task->tile.color;
180 unsigned i;
181
182 LP_DBG(DEBUG_RAST, "%s 0x%x,0x%x,0x%x,0x%x\n", __FUNCTION__,
183 clear_color[0],
184 clear_color[1],
185 clear_color[2],
186 clear_color[3]);
187
188 if (clear_color[0] == clear_color[1] &&
189 clear_color[1] == clear_color[2] &&
190 clear_color[2] == clear_color[3]) {
191 /* clear to grayscale value {x, x, x, x} */
192 for (i = 0; i < rast->state.fb.nr_cbufs; i++) {
193 memset(color_tile[i], clear_color[0], TILE_SIZE * TILE_SIZE * 4);
194 }
195 }
196 else {
197 /* Non-gray color.
198 * Note: if the swizzled tile layout changes (see TILE_PIXEL) this code
199 * will need to change. It'll be pretty obvious when clearing no longer
200 * works.
201 */
202 const unsigned chunk = TILE_SIZE / 4;
203 for (i = 0; i < rast->state.fb.nr_cbufs; i++) {
204 uint8_t *c = color_tile[i];
205 unsigned j;
206 for (j = 0; j < 4 * TILE_SIZE; j++) {
207 memset(c, clear_color[0], chunk);
208 c += chunk;
209 memset(c, clear_color[1], chunk);
210 c += chunk;
211 memset(c, clear_color[2], chunk);
212 c += chunk;
213 memset(c, clear_color[3], chunk);
214 c += chunk;
215 }
216 assert(c - color_tile[i] == TILE_SIZE * TILE_SIZE * 4);
217 }
218 }
219
220 LP_COUNT(nr_color_tile_clear);
221 }
222
223
224 /**
225 * Clear the rasterizer's current z/stencil tile.
226 * This is a bin command called during bin processing.
227 */
228 void
229 lp_rast_clear_zstencil(struct lp_rasterizer_task *task,
230 const union lp_rast_cmd_arg arg)
231 {
232 struct lp_rasterizer *rast = task->rast;
233 const unsigned tile_x = task->x;
234 const unsigned tile_y = task->y;
235 const unsigned height = TILE_SIZE/TILE_VECTOR_HEIGHT;
236 const unsigned width = TILE_SIZE*TILE_VECTOR_HEIGHT;
237 unsigned block_size = util_format_get_blocksize(rast->zsbuf_transfer->texture->format);
238 uint8_t *dst;
239 unsigned dst_stride = rast->zsbuf_transfer->stride*TILE_VECTOR_HEIGHT;
240 unsigned i, j;
241
242 LP_DBG(DEBUG_RAST, "%s 0x%x\n", __FUNCTION__, arg.clear_zstencil);
243
244 assert(rast->zsbuf_map);
245 if (!rast->zsbuf_map)
246 return;
247
248 LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__);
249
250 /*
251 * Clear the aera of the swizzled depth/depth buffer matching this tile, in
252 * stripes of TILE_VECTOR_HEIGHT x TILE_SIZE at a time.
253 *
254 * The swizzled depth format is such that the depths for
255 * TILE_VECTOR_HEIGHT x TILE_VECTOR_WIDTH pixels have consecutive offsets.
256 */
257
258 dst = lp_rast_depth_pointer(rast, tile_x, tile_y);
259
260 switch (block_size) {
261 case 1:
262 memset(dst, (uint8_t) arg.clear_zstencil, height * width);
263 break;
264 case 2:
265 for (i = 0; i < height; i++) {
266 uint16_t *row = (uint16_t *)dst;
267 for (j = 0; j < width; j++)
268 *row++ = (uint16_t) arg.clear_zstencil;
269 dst += dst_stride;
270 }
271 break;
272 case 4:
273 for (i = 0; i < height; i++) {
274 uint32_t *row = (uint32_t *)dst;
275 for (j = 0; j < width; j++)
276 *row++ = arg.clear_zstencil;
277 dst += dst_stride;
278 }
279 break;
280 default:
281 assert(0);
282 break;
283 }
284 }
285
286
287 /**
288 * Load tile color from the framebuffer surface.
289 * This is a bin command called during bin processing.
290 */
291 void
292 lp_rast_load_color(struct lp_rasterizer_task *task,
293 const union lp_rast_cmd_arg arg)
294 {
295 struct lp_rasterizer *rast = task->rast;
296 const unsigned x = task->x, y = task->y;
297 unsigned i;
298
299 LP_DBG(DEBUG_RAST, "%s at %u, %u\n", __FUNCTION__, x, y);
300
301 for (i = 0; i < rast->state.fb.nr_cbufs; i++) {
302 struct pipe_transfer *transfer = rast->cbuf_transfer[i];
303 int w = TILE_SIZE;
304 int h = TILE_SIZE;
305
306 if (x >= transfer->width || y >= transfer->height)
307 continue;
308
309 assert(w >= 0);
310 assert(h >= 0);
311 assert(w <= TILE_SIZE);
312 assert(h <= TILE_SIZE);
313
314 lp_tile_read_4ub(transfer->texture->format,
315 task->tile.color[i],
316 rast->cbuf_map[i],
317 transfer->stride,
318 x, y,
319 w, h);
320
321 LP_COUNT(nr_color_tile_load);
322 }
323 }
324
325
326 void
327 lp_rast_set_state(struct lp_rasterizer_task *task,
328 const union lp_rast_cmd_arg arg)
329 {
330 const struct lp_rast_state *state = arg.set_state;
331
332 LP_DBG(DEBUG_RAST, "%s %p\n", __FUNCTION__, (void *) state);
333
334 /* just set the current state pointer for this rasterizer */
335 task->current_state = state;
336 }
337
338
339
340 /**
341 * Run the shader on all blocks in a tile. This is used when a tile is
342 * completely contained inside a triangle.
343 * This is a bin command called during bin processing.
344 */
345 void
346 lp_rast_shade_tile(struct lp_rasterizer_task *task,
347 const union lp_rast_cmd_arg arg)
348 {
349 struct lp_rasterizer *rast = task->rast;
350 const struct lp_rast_state *state = task->current_state;
351 struct lp_rast_tile *tile = &task->tile;
352 const struct lp_rast_shader_inputs *inputs = arg.shade_tile;
353 const unsigned tile_x = task->x, tile_y = task->y;
354 unsigned x, y;
355
356 LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__);
357
358 /* render the whole 64x64 tile in 4x4 chunks */
359 for (y = 0; y < TILE_SIZE; y += 4){
360 for (x = 0; x < TILE_SIZE; x += 4) {
361 uint8_t *color[PIPE_MAX_COLOR_BUFS];
362 uint32_t *depth;
363 unsigned block_offset, i;
364
365 /* offset of the 16x16 pixel block within the tile */
366 block_offset = ((y / 4) * (16 * 16) + (x / 4) * 16);
367
368 /* color buffer */
369 for (i = 0; i < rast->state.fb.nr_cbufs; i++)
370 color[i] = tile->color[i] + 4 * block_offset;
371
372 /* depth buffer */
373 depth = lp_rast_depth_pointer(rast, tile_x + x, tile_y + y);
374
375 /* run shader */
376 state->jit_function[0]( &state->jit_context,
377 tile_x + x, tile_y + y,
378 inputs->a0,
379 inputs->dadx,
380 inputs->dady,
381 color,
382 depth,
383 INT_MIN, INT_MIN, INT_MIN,
384 NULL, NULL, NULL );
385 }
386 }
387 }
388
389
390 /**
391 * Compute shading for a 4x4 block of pixels.
392 * This is a bin command called during bin processing.
393 */
394 void lp_rast_shade_quads( struct lp_rasterizer_task *task,
395 const struct lp_rast_shader_inputs *inputs,
396 unsigned x, unsigned y,
397 int32_t c1, int32_t c2, int32_t c3)
398 {
399 const struct lp_rast_state *state = task->current_state;
400 struct lp_rasterizer *rast = task->rast;
401 struct lp_rast_tile *tile = &task->tile;
402 uint8_t *color[PIPE_MAX_COLOR_BUFS];
403 void *depth;
404 unsigned i;
405 unsigned ix, iy;
406 int block_offset;
407
408 #ifdef DEBUG
409 assert(state);
410
411 /* Sanity checks */
412 assert(x % TILE_VECTOR_WIDTH == 0);
413 assert(y % TILE_VECTOR_HEIGHT == 0);
414
415 assert((x % 4) == 0);
416 assert((y % 4) == 0);
417 #endif
418
419 ix = x % TILE_SIZE;
420 iy = y % TILE_SIZE;
421
422 /* offset of the 16x16 pixel block within the tile */
423 block_offset = ((iy / 4) * (16 * 16) + (ix / 4) * 16);
424
425 /* color buffer */
426 for (i = 0; i < rast->state.fb.nr_cbufs; i++)
427 color[i] = tile->color[i] + 4 * block_offset;
428
429 /* depth buffer */
430 depth = lp_rast_depth_pointer(rast, x, y);
431
432
433
434 #ifdef DEBUG
435 assert(lp_check_alignment(tile->color[0], 16));
436 assert(lp_check_alignment(state->jit_context.blend_color, 16));
437
438 assert(lp_check_alignment(inputs->step[0], 16));
439 assert(lp_check_alignment(inputs->step[1], 16));
440 assert(lp_check_alignment(inputs->step[2], 16));
441 #endif
442
443 /* run shader */
444 state->jit_function[1]( &state->jit_context,
445 x, y,
446 inputs->a0,
447 inputs->dadx,
448 inputs->dady,
449 color,
450 depth,
451 c1, c2, c3,
452 inputs->step[0], inputs->step[1], inputs->step[2]);
453 }
454
455
456 /**
457 * Set top row and left column of the tile's pixels to white. For debugging.
458 */
459 static void
460 outline_tile(uint8_t *tile)
461 {
462 const uint8_t val = 0xff;
463 unsigned i;
464
465 for (i = 0; i < TILE_SIZE; i++) {
466 TILE_PIXEL(tile, i, 0, 0) = val;
467 TILE_PIXEL(tile, i, 0, 1) = val;
468 TILE_PIXEL(tile, i, 0, 2) = val;
469 TILE_PIXEL(tile, i, 0, 3) = val;
470
471 TILE_PIXEL(tile, 0, i, 0) = val;
472 TILE_PIXEL(tile, 0, i, 1) = val;
473 TILE_PIXEL(tile, 0, i, 2) = val;
474 TILE_PIXEL(tile, 0, i, 3) = val;
475 }
476 }
477
478
479 /**
480 * Draw grid of gray lines at 16-pixel intervals across the tile to
481 * show the sub-tile boundaries. For debugging.
482 */
483 static void
484 outline_subtiles(uint8_t *tile)
485 {
486 const uint8_t val = 0x80;
487 const unsigned step = 16;
488 unsigned i, j;
489
490 for (i = 0; i < TILE_SIZE; i += step) {
491 for (j = 0; j < TILE_SIZE; j++) {
492 TILE_PIXEL(tile, i, j, 0) = val;
493 TILE_PIXEL(tile, i, j, 1) = val;
494 TILE_PIXEL(tile, i, j, 2) = val;
495 TILE_PIXEL(tile, i, j, 3) = val;
496
497 TILE_PIXEL(tile, j, i, 0) = val;
498 TILE_PIXEL(tile, j, i, 1) = val;
499 TILE_PIXEL(tile, j, i, 2) = val;
500 TILE_PIXEL(tile, j, i, 3) = val;
501 }
502 }
503
504 outline_tile(tile);
505 }
506
507
508
509 /**
510 * Write the rasterizer's color tile to the framebuffer.
511 */
512 static void
513 lp_rast_store_color(struct lp_rasterizer_task *task)
514 {
515 struct lp_rasterizer *rast = task->rast;
516 const unsigned x = task->x, y = task->y;
517 unsigned i;
518
519 for (i = 0; i < rast->state.fb.nr_cbufs; i++) {
520 struct pipe_transfer *transfer = rast->cbuf_transfer[i];
521 int w = TILE_SIZE;
522 int h = TILE_SIZE;
523
524 if (x >= transfer->width)
525 continue;
526
527 if (y >= transfer->height)
528 continue;
529
530 LP_DBG(DEBUG_RAST, "%s [%u] %d,%d %dx%d\n", __FUNCTION__,
531 task->thread_index, x, y, w, h);
532
533 if (LP_DEBUG & DEBUG_SHOW_SUBTILES)
534 outline_subtiles(task->tile.color[i]);
535 else if (LP_DEBUG & DEBUG_SHOW_TILES)
536 outline_tile(task->tile.color[i]);
537
538 lp_tile_write_4ub(transfer->texture->format,
539 task->tile.color[i],
540 rast->cbuf_map[i],
541 transfer->stride,
542 x, y,
543 w, h);
544
545 LP_COUNT(nr_color_tile_store);
546 }
547 }
548
549
550 /**
551 * Write the rasterizer's tiles to the framebuffer.
552 */
553 static void
554 lp_rast_end_tile(struct lp_rasterizer_task *task)
555 {
556 struct lp_rasterizer *rast = task->rast;
557
558 LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__);
559
560 if (rast->state.write_color)
561 lp_rast_store_color(task);
562 }
563
564
565 /**
566 * Signal on a fence. This is called during bin execution/rasterization.
567 * Called per thread.
568 */
569 void
570 lp_rast_fence(struct lp_rasterizer_task *task,
571 const union lp_rast_cmd_arg arg)
572 {
573 struct lp_fence *fence = arg.fence;
574
575 pipe_mutex_lock( fence->mutex );
576
577 fence->count++;
578 assert(fence->count <= fence->rank);
579
580 LP_DBG(DEBUG_RAST, "%s count=%u rank=%u\n", __FUNCTION__,
581 fence->count, fence->rank);
582
583 pipe_condvar_signal( fence->signalled );
584
585 pipe_mutex_unlock( fence->mutex );
586 }
587
588
589 /**
590 * When all the threads are done rasterizing a scene, one thread will
591 * call this function to reset the scene and put it onto the empty queue.
592 */
593 static void
594 release_scene( struct lp_rasterizer *rast,
595 struct lp_scene *scene )
596 {
597 util_unreference_framebuffer_state( &scene->fb );
598
599 lp_scene_reset( scene );
600
601 assert(lp_scene_is_empty(scene));
602
603 lp_scene_enqueue( rast->empty_scenes, scene );
604 rast->curr_scene = NULL;
605 }
606
607
608 /**
609 * Rasterize commands for a single bin.
610 * \param x, y position of the bin's tile in the framebuffer
611 * Must be called between lp_rast_begin() and lp_rast_end().
612 * Called per thread.
613 */
614 static void
615 rasterize_bin(struct lp_rasterizer_task *task,
616 const struct cmd_bin *bin,
617 int x, int y)
618 {
619 const struct cmd_block_list *commands = &bin->commands;
620 struct cmd_block *block;
621 unsigned k;
622
623 lp_rast_start_tile( task, x, y );
624
625 /* simply execute each of the commands in the block list */
626 for (block = commands->head; block; block = block->next) {
627 for (k = 0; k < block->count; k++) {
628 block->cmd[k]( task, block->arg[k] );
629 }
630 }
631
632 lp_rast_end_tile( task );
633 }
634
635
636 #define RAST(x) { lp_rast_##x, #x }
637
638 static struct {
639 lp_rast_cmd cmd;
640 const char *name;
641 } cmd_names[] =
642 {
643 RAST(load_color),
644 RAST(clear_color),
645 RAST(clear_zstencil),
646 RAST(triangle),
647 RAST(shade_tile),
648 RAST(set_state),
649 RAST(fence),
650 };
651
652 static void
653 debug_bin( const struct cmd_bin *bin )
654 {
655 const struct cmd_block *head = bin->commands.head;
656 int i, j;
657
658 for (i = 0; i < head->count; i++) {
659 debug_printf("%d: ", i);
660 for (j = 0; j < Elements(cmd_names); j++) {
661 if (head->cmd[i] == cmd_names[j].cmd) {
662 debug_printf("%s\n", cmd_names[j].name);
663 break;
664 }
665 }
666 if (j == Elements(cmd_names))
667 debug_printf("...other\n");
668 }
669
670 }
671
672 /* An empty bin is one that just loads the contents of the tile and
673 * stores them again unchanged. This typically happens when bins have
674 * been flushed for some reason in the middle of a frame, or when
675 * incremental updates are being made to a render target.
676 *
677 * Try to avoid doing pointless work in this case.
678 */
679 static boolean
680 is_empty_bin( const struct cmd_bin *bin )
681 {
682 const struct cmd_block *head = bin->commands.head;
683 int i;
684
685 if (0)
686 debug_bin(bin);
687
688 /* We emit at most two load-tile commands at the start of the first
689 * command block. In addition we seem to emit a couple of
690 * set-state commands even in empty bins.
691 *
692 * As a heuristic, if a bin has more than 4 commands, consider it
693 * non-empty.
694 */
695 if (head->next != NULL ||
696 head->count > 4) {
697 return FALSE;
698 }
699
700 for (i = 0; i < head->count; i++)
701 if (head->cmd[i] != lp_rast_load_color &&
702 head->cmd[i] != lp_rast_set_state) {
703 return FALSE;
704 }
705
706 return TRUE;
707 }
708
709
710
711 /**
712 * Rasterize/execute all bins within a scene.
713 * Called per thread.
714 */
715 static void
716 rasterize_scene(struct lp_rasterizer_task *task,
717 struct lp_scene *scene,
718 bool write_depth)
719 {
720 /* loop over scene bins, rasterize each */
721 #if 0
722 {
723 unsigned i, j;
724 for (i = 0; i < scene->tiles_x; i++) {
725 for (j = 0; j < scene->tiles_y; j++) {
726 struct cmd_bin *bin = lp_scene_get_bin(scene, i, j);
727 rasterize_bin(task, bin, i * TILE_SIZE, j * TILE_SIZE);
728 }
729 }
730 }
731 #else
732 {
733 struct cmd_bin *bin;
734 int x, y;
735
736 assert(scene);
737 while ((bin = lp_scene_bin_iter_next(scene, &x, &y))) {
738 if (!is_empty_bin( bin ))
739 rasterize_bin(task, bin, x * TILE_SIZE, y * TILE_SIZE);
740 }
741 }
742 #endif
743 }
744
745
746 /**
747 * Called by setup module when it has something for us to render.
748 */
749 void
750 lp_rasterize_scene( struct lp_rasterizer *rast,
751 struct lp_scene *scene,
752 const struct pipe_framebuffer_state *fb,
753 bool write_depth )
754 {
755 boolean debug = false;
756
757 LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__);
758
759 if (debug) {
760 unsigned x, y;
761 debug_printf("rasterize scene:\n");
762 debug_printf(" data size: %u\n", lp_scene_data_size(scene));
763 for (y = 0; y < scene->tiles_y; y++) {
764 for (x = 0; x < scene->tiles_x; x++) {
765 debug_printf(" bin %u, %u size: %u\n", x, y,
766 lp_scene_bin_size(scene, x, y));
767 }
768 }
769 }
770
771 /* save framebuffer state in the bin */
772 util_copy_framebuffer_state(&scene->fb, fb);
773 scene->write_depth = write_depth;
774
775 if (rast->num_threads == 0) {
776 /* no threading */
777
778 lp_rast_begin( rast, fb,
779 fb->nr_cbufs != 0, /* always write color if cbufs present */
780 fb->zsbuf != NULL && write_depth );
781
782 lp_scene_bin_iter_begin( scene );
783 rasterize_scene( &rast->tasks[0], scene, write_depth );
784
785 release_scene( rast, scene );
786
787 lp_rast_end( rast );
788 }
789 else {
790 /* threaded rendering! */
791 unsigned i;
792
793 lp_scene_enqueue( rast->full_scenes, scene );
794
795 /* signal the threads that there's work to do */
796 for (i = 0; i < rast->num_threads; i++) {
797 pipe_semaphore_signal(&rast->tasks[i].work_ready);
798 }
799
800 /* wait for work to complete */
801 for (i = 0; i < rast->num_threads; i++) {
802 pipe_semaphore_wait(&rast->tasks[i].work_done);
803 }
804 }
805
806 LP_DBG(DEBUG_SETUP, "%s done \n", __FUNCTION__);
807 }
808
809
810 /**
811 * This is the thread's main entrypoint.
812 * It's a simple loop:
813 * 1. wait for work
814 * 2. do work
815 * 3. signal that we're done
816 */
817 static PIPE_THREAD_ROUTINE( thread_func, init_data )
818 {
819 struct lp_rasterizer_task *task = (struct lp_rasterizer_task *) init_data;
820 struct lp_rasterizer *rast = task->rast;
821 boolean debug = false;
822
823 while (1) {
824 /* wait for work */
825 if (debug)
826 debug_printf("thread %d waiting for work\n", task->thread_index);
827 pipe_semaphore_wait(&task->work_ready);
828
829 if (rast->exit_flag)
830 break;
831
832 if (task->thread_index == 0) {
833 /* thread[0]:
834 * - get next scene to rasterize
835 * - map the framebuffer surfaces
836 */
837 const struct pipe_framebuffer_state *fb;
838 boolean write_depth;
839
840 rast->curr_scene = lp_scene_dequeue( rast->full_scenes, TRUE );
841
842 lp_scene_bin_iter_begin( rast->curr_scene );
843
844 fb = &rast->curr_scene->fb;
845 write_depth = rast->curr_scene->write_depth;
846
847 lp_rast_begin( rast, fb,
848 fb->nr_cbufs != 0,
849 fb->zsbuf != NULL && write_depth );
850 }
851
852 /* Wait for all threads to get here so that threads[1+] don't
853 * get a null rast->curr_scene pointer.
854 */
855 pipe_barrier_wait( &rast->barrier );
856
857 /* do work */
858 if (debug)
859 debug_printf("thread %d doing work\n", task->thread_index);
860 rasterize_scene(task,
861 rast->curr_scene,
862 rast->curr_scene->write_depth);
863
864 /* wait for all threads to finish with this scene */
865 pipe_barrier_wait( &rast->barrier );
866
867 if (task->thread_index == 0) {
868 /* thread[0]:
869 * - release the scene object
870 * - unmap the framebuffer surfaces
871 */
872 release_scene( rast, rast->curr_scene );
873 lp_rast_end( rast );
874 }
875
876 /* signal done with work */
877 if (debug)
878 debug_printf("thread %d done working\n", task->thread_index);
879 pipe_semaphore_signal(&task->work_done);
880 }
881
882 return NULL;
883 }
884
885
886 /**
887 * Initialize semaphores and spawn the threads.
888 */
889 static void
890 create_rast_threads(struct lp_rasterizer *rast)
891 {
892 unsigned i;
893
894 #ifdef PIPE_OS_WINDOWS
895 /* Multithreading not supported on windows until conditions and barriers are
896 * properly implemented. */
897 rast->num_threads = 0;
898 #else
899 rast->num_threads = util_cpu_caps.nr_cpus;
900 rast->num_threads = debug_get_num_option("LP_NUM_THREADS", rast->num_threads);
901 rast->num_threads = MIN2(rast->num_threads, MAX_THREADS);
902 #endif
903
904 /* NOTE: if num_threads is zero, we won't use any threads */
905 for (i = 0; i < rast->num_threads; i++) {
906 pipe_semaphore_init(&rast->tasks[i].work_ready, 0);
907 pipe_semaphore_init(&rast->tasks[i].work_done, 0);
908 rast->threads[i] = pipe_thread_create(thread_func,
909 (void *) &rast->tasks[i]);
910 }
911 }
912
913
914
915 /**
916 * Create new lp_rasterizer.
917 * \param empty the queue to put empty scenes on after we've finished
918 * processing them.
919 */
920 struct lp_rasterizer *
921 lp_rast_create( struct pipe_screen *screen, struct lp_scene_queue *empty )
922 {
923 struct lp_rasterizer *rast;
924 unsigned i, cbuf;
925
926 rast = CALLOC_STRUCT(lp_rasterizer);
927 if(!rast)
928 return NULL;
929
930 rast->screen = screen;
931
932 rast->empty_scenes = empty;
933 rast->full_scenes = lp_scene_queue_create();
934
935 for (i = 0; i < Elements(rast->tasks); i++) {
936 struct lp_rasterizer_task *task = &rast->tasks[i];
937
938 for (cbuf = 0; cbuf < PIPE_MAX_COLOR_BUFS; cbuf++ )
939 task->tile.color[cbuf] = align_malloc(TILE_SIZE * TILE_SIZE * 4, 16);
940
941 task->rast = rast;
942 task->thread_index = i;
943 }
944
945 create_rast_threads(rast);
946
947 /* for synchronizing rasterization threads */
948 pipe_barrier_init( &rast->barrier, rast->num_threads );
949
950 return rast;
951 }
952
953
954 /* Shutdown:
955 */
956 void lp_rast_destroy( struct lp_rasterizer *rast )
957 {
958 unsigned i, cbuf;
959
960 util_unreference_framebuffer_state(&rast->state.fb);
961
962 for (i = 0; i < Elements(rast->tasks); i++) {
963 for (cbuf = 0; cbuf < PIPE_MAX_COLOR_BUFS; cbuf++ )
964 align_free(rast->tasks[i].tile.color[cbuf]);
965 }
966
967 /* Set exit_flag and signal each thread's work_ready semaphore.
968 * Each thread will be woken up, notice that the exit_flag is set and
969 * break out of its main loop. The thread will then exit.
970 */
971 rast->exit_flag = TRUE;
972 for (i = 0; i < rast->num_threads; i++) {
973 pipe_semaphore_signal(&rast->tasks[i].work_ready);
974 }
975
976 for (i = 0; i < rast->num_threads; i++) {
977 pipe_semaphore_destroy(&rast->tasks[i].work_ready);
978 pipe_semaphore_destroy(&rast->tasks[i].work_done);
979 }
980
981 /* for synchronizing rasterization threads */
982 pipe_barrier_destroy( &rast->barrier );
983
984 FREE(rast);
985 }
986
987
988 /** Return number of rasterization threads */
989 unsigned
990 lp_rast_get_num_threads( struct lp_rasterizer *rast )
991 {
992 return rast->num_threads;
993 }