Merge branch 'gallium-dynamicstencilref'
[mesa.git] / src / gallium / drivers / llvmpipe / lp_rast.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 #include <limits.h>
29 #include "util/u_memory.h"
30 #include "util/u_math.h"
31 #include "util/u_cpu_detect.h"
32 #include "util/u_surface.h"
33
34 #include "lp_scene_queue.h"
35 #include "lp_debug.h"
36 #include "lp_fence.h"
37 #include "lp_perf.h"
38 #include "lp_rast.h"
39 #include "lp_rast_priv.h"
40 #include "lp_tile_soa.h"
41 #include "gallivm/lp_bld_debug.h"
42 #include "lp_scene.h"
43
44
45 /**
46 * Begin the rasterization phase.
47 * Map the framebuffer surfaces. Initialize the 'rast' state.
48 */
49 static boolean
50 lp_rast_begin( struct lp_rasterizer *rast,
51 const struct pipe_framebuffer_state *fb,
52 boolean write_color,
53 boolean write_zstencil )
54 {
55 struct pipe_screen *screen = rast->screen;
56 struct pipe_surface *cbuf, *zsbuf;
57 int i;
58
59 LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__);
60
61 util_copy_framebuffer_state(&rast->state.fb, fb);
62
63 rast->state.write_zstencil = write_zstencil;
64 rast->state.write_color = write_color;
65
66 rast->check_for_clipped_tiles = (fb->width % TILE_SIZE != 0 ||
67 fb->height % TILE_SIZE != 0);
68
69
70 for (i = 0; i < rast->state.fb.nr_cbufs; i++) {
71 cbuf = rast->state.fb.cbufs[i];
72 if (cbuf) {
73 rast->cbuf_transfer[i] = screen->get_tex_transfer(rast->screen,
74 cbuf->texture,
75 cbuf->face,
76 cbuf->level,
77 cbuf->zslice,
78 PIPE_TRANSFER_READ_WRITE,
79 0, 0,
80 cbuf->width,
81 cbuf->height);
82 if (!rast->cbuf_transfer[i])
83 goto fail;
84
85 rast->cbuf_map[i] = screen->transfer_map(rast->screen,
86 rast->cbuf_transfer[i]);
87 if (!rast->cbuf_map[i])
88 goto fail;
89 }
90 }
91
92 zsbuf = rast->state.fb.zsbuf;
93 if (zsbuf) {
94 rast->zsbuf_transfer = screen->get_tex_transfer(rast->screen,
95 zsbuf->texture,
96 zsbuf->face,
97 zsbuf->level,
98 zsbuf->zslice,
99 PIPE_TRANSFER_READ_WRITE,
100 0, 0,
101 zsbuf->width,
102 zsbuf->height);
103 if (!rast->zsbuf_transfer)
104 goto fail;
105
106 rast->zsbuf_map = screen->transfer_map(rast->screen,
107 rast->zsbuf_transfer);
108 if (!rast->zsbuf_map)
109 goto fail;
110 }
111
112 return TRUE;
113
114 fail:
115 /* Unmap and release transfers?
116 */
117 return FALSE;
118 }
119
120
121 /**
122 * Finish the rasterization phase.
123 * Unmap framebuffer surfaces.
124 */
125 static void
126 lp_rast_end( struct lp_rasterizer *rast )
127 {
128 struct pipe_screen *screen = rast->screen;
129 unsigned i;
130
131 for (i = 0; i < rast->state.fb.nr_cbufs; i++) {
132 if (rast->cbuf_map[i])
133 screen->transfer_unmap(screen, rast->cbuf_transfer[i]);
134
135 if (rast->cbuf_transfer[i])
136 screen->tex_transfer_destroy(rast->cbuf_transfer[i]);
137
138 rast->cbuf_transfer[i] = NULL;
139 rast->cbuf_map[i] = NULL;
140 }
141
142 if (rast->zsbuf_map)
143 screen->transfer_unmap(screen, rast->zsbuf_transfer);
144
145 if (rast->zsbuf_transfer)
146 screen->tex_transfer_destroy(rast->zsbuf_transfer);
147
148 rast->zsbuf_transfer = NULL;
149 rast->zsbuf_map = NULL;
150 }
151
152
153 /**
154 * Begining rasterization of a tile.
155 * \param x window X position of the tile, in pixels
156 * \param y window Y position of the tile, in pixels
157 */
158 static void
159 lp_rast_start_tile( struct lp_rasterizer *rast,
160 unsigned thread_index,
161 unsigned x, unsigned y )
162 {
163 LP_DBG(DEBUG_RAST, "%s %d,%d\n", __FUNCTION__, x, y);
164
165 rast->tasks[thread_index].x = x;
166 rast->tasks[thread_index].y = y;
167 }
168
169
170 /**
171 * Clear the rasterizer's current color tile.
172 * This is a bin command called during bin processing.
173 */
174 void lp_rast_clear_color( struct lp_rasterizer *rast,
175 unsigned thread_index,
176 const union lp_rast_cmd_arg arg )
177 {
178 const uint8_t *clear_color = arg.clear_color;
179 uint8_t **color_tile = rast->tasks[thread_index].tile.color;
180 unsigned i;
181
182 LP_DBG(DEBUG_RAST, "%s 0x%x,0x%x,0x%x,0x%x\n", __FUNCTION__,
183 clear_color[0],
184 clear_color[1],
185 clear_color[2],
186 clear_color[3]);
187
188 if (clear_color[0] == clear_color[1] &&
189 clear_color[1] == clear_color[2] &&
190 clear_color[2] == clear_color[3]) {
191 /* clear to grayscale value {x, x, x, x} */
192 for (i = 0; i < rast->state.fb.nr_cbufs; i++) {
193 memset(color_tile[i], clear_color[0], TILE_SIZE * TILE_SIZE * 4);
194 }
195 }
196 else {
197 /* Non-gray color.
198 * Note: if the swizzled tile layout changes (see TILE_PIXEL) this code
199 * will need to change. It'll be pretty obvious when clearing no longer
200 * works.
201 */
202 const unsigned chunk = TILE_SIZE / 4;
203 for (i = 0; i < rast->state.fb.nr_cbufs; i++) {
204 uint8_t *c = color_tile[i];
205 unsigned j;
206 for (j = 0; j < 4 * TILE_SIZE; j++) {
207 memset(c, clear_color[0], chunk);
208 c += chunk;
209 memset(c, clear_color[1], chunk);
210 c += chunk;
211 memset(c, clear_color[2], chunk);
212 c += chunk;
213 memset(c, clear_color[3], chunk);
214 c += chunk;
215 }
216 assert(c - color_tile[i] == TILE_SIZE * TILE_SIZE * 4);
217 }
218 }
219
220 LP_COUNT(nr_color_tile_clear);
221 }
222
223
224 /**
225 * Clear the rasterizer's current z/stencil tile.
226 * This is a bin command called during bin processing.
227 */
228 void lp_rast_clear_zstencil( struct lp_rasterizer *rast,
229 unsigned thread_index,
230 const union lp_rast_cmd_arg arg)
231 {
232 struct lp_rasterizer_task *task = &rast->tasks[thread_index];
233 const unsigned tile_x = task->x;
234 const unsigned tile_y = task->y;
235 const unsigned height = TILE_SIZE/TILE_VECTOR_HEIGHT;
236 const unsigned width = TILE_SIZE*TILE_VECTOR_HEIGHT;
237 unsigned block_size = util_format_get_blocksize(rast->zsbuf_transfer->texture->format);
238 uint8_t *dst;
239 unsigned dst_stride = rast->zsbuf_transfer->stride*TILE_VECTOR_HEIGHT;
240 unsigned i, j;
241
242 LP_DBG(DEBUG_RAST, "%s 0x%x\n", __FUNCTION__, arg.clear_zstencil);
243
244 assert(rast->zsbuf_map);
245 if (!rast->zsbuf_map)
246 return;
247
248 LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__);
249
250 /*
251 * Clear the aera of the swizzled depth/depth buffer matching this tile, in
252 * stripes of TILE_VECTOR_HEIGHT x TILE_SIZE at a time.
253 *
254 * The swizzled depth format is such that the depths for
255 * TILE_VECTOR_HEIGHT x TILE_VECTOR_WIDTH pixels have consecutive offsets.
256 */
257
258 dst = lp_rast_depth_pointer(rast, tile_x, tile_y);
259
260 switch (block_size) {
261 case 1:
262 memset(dst, (uint8_t) arg.clear_zstencil, height * width);
263 break;
264 case 2:
265 for (i = 0; i < height; i++) {
266 uint16_t *row = (uint16_t *)dst;
267 for (j = 0; j < width; j++)
268 *row++ = (uint16_t) arg.clear_zstencil;
269 dst += dst_stride;
270 }
271 break;
272 case 4:
273 for (i = 0; i < height; i++) {
274 uint32_t *row = (uint32_t *)dst;
275 for (j = 0; j < width; j++)
276 *row++ = arg.clear_zstencil;
277 dst += dst_stride;
278 }
279 break;
280 default:
281 assert(0);
282 break;
283 }
284 }
285
286
287 /**
288 * Load tile color from the framebuffer surface.
289 * This is a bin command called during bin processing.
290 */
291 void lp_rast_load_color( struct lp_rasterizer *rast,
292 unsigned thread_index,
293 const union lp_rast_cmd_arg arg)
294 {
295 struct lp_rasterizer_task *task = &rast->tasks[thread_index];
296 const unsigned x = task->x;
297 const unsigned y = task->y;
298 unsigned i;
299
300 LP_DBG(DEBUG_RAST, "%s at %u, %u\n", __FUNCTION__, x, y);
301
302 for (i = 0; i < rast->state.fb.nr_cbufs; i++) {
303 struct pipe_transfer *transfer = rast->cbuf_transfer[i];
304 int w = TILE_SIZE;
305 int h = TILE_SIZE;
306
307 if (x >= transfer->width)
308 continue;
309
310 if (y >= transfer->height)
311 continue;
312
313 assert(w >= 0);
314 assert(h >= 0);
315 assert(w <= TILE_SIZE);
316 assert(h <= TILE_SIZE);
317
318 lp_tile_read_4ub(transfer->texture->format,
319 task->tile.color[i],
320 rast->cbuf_map[i],
321 transfer->stride,
322 x, y,
323 w, h);
324
325 LP_COUNT(nr_color_tile_load);
326 }
327 }
328
329
330 void lp_rast_set_state( struct lp_rasterizer *rast,
331 unsigned thread_index,
332 const union lp_rast_cmd_arg arg )
333 {
334 const struct lp_rast_state *state = arg.set_state;
335
336 LP_DBG(DEBUG_RAST, "%s %p\n", __FUNCTION__, (void *) state);
337
338 /* just set the current state pointer for this rasterizer */
339 rast->tasks[thread_index].current_state = state;
340 }
341
342
343
344 /**
345 * Run the shader on all blocks in a tile. This is used when a tile is
346 * completely contained inside a triangle.
347 * This is a bin command called during bin processing.
348 */
349 void lp_rast_shade_tile( struct lp_rasterizer *rast,
350 unsigned thread_index,
351 const union lp_rast_cmd_arg arg )
352 {
353 struct lp_rasterizer_task *task = &rast->tasks[thread_index];
354 const struct lp_rast_state *state = task->current_state;
355 struct lp_rast_tile *tile = &task->tile;
356 const struct lp_rast_shader_inputs *inputs = arg.shade_tile;
357 const unsigned tile_x = task->x;
358 const unsigned tile_y = task->y;
359 unsigned x, y;
360
361 LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__);
362
363 /* render the whole 64x64 tile in 4x4 chunks */
364 for (y = 0; y < TILE_SIZE; y += 4){
365 for (x = 0; x < TILE_SIZE; x += 4) {
366 uint8_t *color[PIPE_MAX_COLOR_BUFS];
367 uint32_t *depth;
368 unsigned block_offset, i;
369
370 /* offset of the 16x16 pixel block within the tile */
371 block_offset = ((y / 4) * (16 * 16) + (x / 4) * 16);
372
373 /* color buffer */
374 for (i = 0; i < rast->state.fb.nr_cbufs; i++)
375 color[i] = tile->color[i] + 4 * block_offset;
376
377 /* depth buffer */
378 depth = lp_rast_depth_pointer(rast, tile_x + x, tile_y + y);
379
380 /* run shader */
381 state->jit_function[0]( &state->jit_context,
382 tile_x + x, tile_y + y,
383 inputs->a0,
384 inputs->dadx,
385 inputs->dady,
386 color,
387 depth,
388 INT_MIN, INT_MIN, INT_MIN,
389 NULL, NULL, NULL );
390 }
391 }
392 }
393
394
395 /**
396 * Compute shading for a 4x4 block of pixels.
397 * This is a bin command called during bin processing.
398 */
399 void lp_rast_shade_quads( struct lp_rasterizer *rast,
400 unsigned thread_index,
401 const struct lp_rast_shader_inputs *inputs,
402 unsigned x, unsigned y,
403 int32_t c1, int32_t c2, int32_t c3)
404 {
405 struct lp_rasterizer_task *task = &rast->tasks[thread_index];
406 const struct lp_rast_state *state = task->current_state;
407 struct lp_rast_tile *tile = &task->tile;
408 uint8_t *color[PIPE_MAX_COLOR_BUFS];
409 void *depth;
410 unsigned i;
411 unsigned ix, iy;
412 int block_offset;
413
414 #ifdef DEBUG
415 assert(state);
416
417 /* Sanity checks */
418 assert(x % TILE_VECTOR_WIDTH == 0);
419 assert(y % TILE_VECTOR_HEIGHT == 0);
420
421 assert((x % 4) == 0);
422 assert((y % 4) == 0);
423 #endif
424
425 ix = x % TILE_SIZE;
426 iy = y % TILE_SIZE;
427
428 /* offset of the 16x16 pixel block within the tile */
429 block_offset = ((iy / 4) * (16 * 16) + (ix / 4) * 16);
430
431 /* color buffer */
432 for (i = 0; i < rast->state.fb.nr_cbufs; i++)
433 color[i] = tile->color[i] + 4 * block_offset;
434
435 /* depth buffer */
436 depth = lp_rast_depth_pointer(rast, x, y);
437
438
439
440 #ifdef DEBUG
441 assert(lp_check_alignment(tile->color[0], 16));
442 assert(lp_check_alignment(state->jit_context.blend_color, 16));
443
444 assert(lp_check_alignment(inputs->step[0], 16));
445 assert(lp_check_alignment(inputs->step[1], 16));
446 assert(lp_check_alignment(inputs->step[2], 16));
447 #endif
448
449 /* run shader */
450 state->jit_function[1]( &state->jit_context,
451 x, y,
452 inputs->a0,
453 inputs->dadx,
454 inputs->dady,
455 color,
456 depth,
457 c1, c2, c3,
458 inputs->step[0], inputs->step[1], inputs->step[2]);
459 }
460
461
462 /**
463 * Set top row and left column of the tile's pixels to white. For debugging.
464 */
465 static void
466 outline_tile(uint8_t *tile)
467 {
468 const uint8_t val = 0xff;
469 unsigned i;
470
471 for (i = 0; i < TILE_SIZE; i++) {
472 TILE_PIXEL(tile, i, 0, 0) = val;
473 TILE_PIXEL(tile, i, 0, 1) = val;
474 TILE_PIXEL(tile, i, 0, 2) = val;
475 TILE_PIXEL(tile, i, 0, 3) = val;
476
477 TILE_PIXEL(tile, 0, i, 0) = val;
478 TILE_PIXEL(tile, 0, i, 1) = val;
479 TILE_PIXEL(tile, 0, i, 2) = val;
480 TILE_PIXEL(tile, 0, i, 3) = val;
481 }
482 }
483
484
485 /**
486 * Draw grid of gray lines at 16-pixel intervals across the tile to
487 * show the sub-tile boundaries. For debugging.
488 */
489 static void
490 outline_subtiles(uint8_t *tile)
491 {
492 const uint8_t val = 0x80;
493 const unsigned step = 16;
494 unsigned i, j;
495
496 for (i = 0; i < TILE_SIZE; i += step) {
497 for (j = 0; j < TILE_SIZE; j++) {
498 TILE_PIXEL(tile, i, j, 0) = val;
499 TILE_PIXEL(tile, i, j, 1) = val;
500 TILE_PIXEL(tile, i, j, 2) = val;
501 TILE_PIXEL(tile, i, j, 3) = val;
502
503 TILE_PIXEL(tile, j, i, 0) = val;
504 TILE_PIXEL(tile, j, i, 1) = val;
505 TILE_PIXEL(tile, j, i, 2) = val;
506 TILE_PIXEL(tile, j, i, 3) = val;
507 }
508 }
509
510 outline_tile(tile);
511 }
512
513
514
515 /**
516 * Write the rasterizer's color tile to the framebuffer.
517 */
518 static void lp_rast_store_color( struct lp_rasterizer *rast,
519 unsigned thread_index)
520 {
521 struct lp_rasterizer_task *task = &rast->tasks[thread_index];
522 const unsigned x = task->x;
523 const unsigned y = task->y;
524 unsigned i;
525
526 for (i = 0; i < rast->state.fb.nr_cbufs; i++) {
527 struct pipe_transfer *transfer = rast->cbuf_transfer[i];
528 int w = TILE_SIZE;
529 int h = TILE_SIZE;
530
531 if (x >= transfer->width)
532 continue;
533
534 if (y >= transfer->height)
535 continue;
536
537 LP_DBG(DEBUG_RAST, "%s [%u] %d,%d %dx%d\n", __FUNCTION__,
538 thread_index, x, y, w, h);
539
540 if (LP_DEBUG & DEBUG_SHOW_SUBTILES)
541 outline_subtiles(task->tile.color[i]);
542 else if (LP_DEBUG & DEBUG_SHOW_TILES)
543 outline_tile(task->tile.color[i]);
544
545 lp_tile_write_4ub(transfer->texture->format,
546 task->tile.color[i],
547 rast->cbuf_map[i],
548 transfer->stride,
549 x, y,
550 w, h);
551
552 LP_COUNT(nr_color_tile_store);
553 }
554 }
555
556
557 /**
558 * Write the rasterizer's tiles to the framebuffer.
559 */
560 static void
561 lp_rast_end_tile( struct lp_rasterizer *rast,
562 unsigned thread_index )
563 {
564 LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__);
565
566 if (rast->state.write_color)
567 lp_rast_store_color(rast, thread_index);
568 }
569
570
571 /**
572 * Signal on a fence. This is called during bin execution/rasterization.
573 * Called per thread.
574 */
575 void lp_rast_fence( struct lp_rasterizer *rast,
576 unsigned thread_index,
577 const union lp_rast_cmd_arg arg )
578 {
579 struct lp_fence *fence = arg.fence;
580
581 pipe_mutex_lock( fence->mutex );
582
583 fence->count++;
584 assert(fence->count <= fence->rank);
585
586 LP_DBG(DEBUG_RAST, "%s count=%u rank=%u\n", __FUNCTION__,
587 fence->count, fence->rank);
588
589 pipe_condvar_signal( fence->signalled );
590
591 pipe_mutex_unlock( fence->mutex );
592 }
593
594
595 /**
596 * When all the threads are done rasterizing a scene, one thread will
597 * call this function to reset the scene and put it onto the empty queue.
598 */
599 static void
600 release_scene( struct lp_rasterizer *rast,
601 struct lp_scene *scene )
602 {
603 util_unreference_framebuffer_state( &scene->fb );
604
605 lp_scene_reset( scene );
606 lp_scene_enqueue( rast->empty_scenes, scene );
607 rast->curr_scene = NULL;
608 }
609
610
611 /**
612 * Rasterize commands for a single bin.
613 * \param x, y position of the bin's tile in the framebuffer
614 * Must be called between lp_rast_begin() and lp_rast_end().
615 * Called per thread.
616 */
617 static void
618 rasterize_bin( struct lp_rasterizer *rast,
619 unsigned thread_index,
620 const struct cmd_bin *bin,
621 int x, int y)
622 {
623 const struct cmd_block_list *commands = &bin->commands;
624 struct cmd_block *block;
625 unsigned k;
626
627 lp_rast_start_tile( rast, thread_index, x, y );
628
629 /* simply execute each of the commands in the block list */
630 for (block = commands->head; block; block = block->next) {
631 for (k = 0; k < block->count; k++) {
632 block->cmd[k]( rast, thread_index, block->arg[k] );
633 }
634 }
635
636 lp_rast_end_tile( rast, thread_index );
637 }
638
639
640 #define RAST(x) { lp_rast_##x, #x }
641
642 static struct {
643 lp_rast_cmd cmd;
644 const char *name;
645 } cmd_names[] =
646 {
647 RAST(load_color),
648 RAST(clear_color),
649 RAST(clear_zstencil),
650 RAST(triangle),
651 RAST(shade_tile),
652 RAST(set_state),
653 RAST(fence),
654 };
655
656 static void
657 debug_bin( const struct cmd_bin *bin )
658 {
659 const struct cmd_block *head = bin->commands.head;
660 int i, j;
661
662 for (i = 0; i < head->count; i++) {
663 debug_printf("%d: ", i);
664 for (j = 0; j < Elements(cmd_names); j++) {
665 if (head->cmd[i] == cmd_names[j].cmd) {
666 debug_printf("%s\n", cmd_names[j].name);
667 break;
668 }
669 }
670 if (j == Elements(cmd_names))
671 debug_printf("...other\n");
672 }
673
674 }
675
676 /* An empty bin is one that just loads the contents of the tile and
677 * stores them again unchanged. This typically happens when bins have
678 * been flushed for some reason in the middle of a frame, or when
679 * incremental updates are being made to a render target.
680 *
681 * Try to avoid doing pointless work in this case.
682 */
683 static boolean
684 is_empty_bin( const struct cmd_bin *bin )
685 {
686 const struct cmd_block *head = bin->commands.head;
687 int i;
688
689 if (0)
690 debug_bin(bin);
691
692 /* We emit at most two load-tile commands at the start of the first
693 * command block. In addition we seem to emit a couple of
694 * set-state commands even in empty bins.
695 *
696 * As a heuristic, if a bin has more than 4 commands, consider it
697 * non-empty.
698 */
699 if (head->next != NULL ||
700 head->count > 4) {
701 return FALSE;
702 }
703
704 for (i = 0; i < head->count; i++)
705 if (head->cmd[i] != lp_rast_load_color &&
706 head->cmd[i] != lp_rast_set_state) {
707 return FALSE;
708 }
709
710 return TRUE;
711 }
712
713
714
715 /**
716 * Rasterize/execute all bins within a scene.
717 * Called per thread.
718 */
719 static void
720 rasterize_scene( struct lp_rasterizer *rast,
721 unsigned thread_index,
722 struct lp_scene *scene,
723 bool write_depth )
724 {
725 /* loop over scene bins, rasterize each */
726 #if 0
727 {
728 unsigned i, j;
729 for (i = 0; i < scene->tiles_x; i++) {
730 for (j = 0; j < scene->tiles_y; j++) {
731 struct cmd_bin *bin = lp_get_bin(scene, i, j);
732 rasterize_bin( rast, thread_index,
733 bin, i * TILE_SIZE, j * TILE_SIZE );
734 }
735 }
736 }
737 #else
738 {
739 struct cmd_bin *bin;
740 int x, y;
741
742 assert(scene);
743 while ((bin = lp_scene_bin_iter_next(scene, &x, &y))) {
744 if (!is_empty_bin( bin ))
745 rasterize_bin( rast, thread_index, bin, x * TILE_SIZE, y * TILE_SIZE);
746 }
747 }
748 #endif
749 }
750
751
752 /**
753 * Called by setup module when it has something for us to render.
754 */
755 void
756 lp_rasterize_scene( struct lp_rasterizer *rast,
757 struct lp_scene *scene,
758 const struct pipe_framebuffer_state *fb,
759 bool write_depth )
760 {
761 boolean debug = false;
762
763 LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__);
764
765 if (debug) {
766 unsigned x, y;
767 debug_printf("rasterize scene:\n");
768 debug_printf(" data size: %u\n", lp_scene_data_size(scene));
769 for (y = 0; y < scene->tiles_y; y++) {
770 for (x = 0; x < scene->tiles_x; x++) {
771 debug_printf(" bin %u, %u size: %u\n", x, y,
772 lp_scene_bin_size(scene, x, y));
773 }
774 }
775 }
776
777 /* save framebuffer state in the bin */
778 util_copy_framebuffer_state(&scene->fb, fb);
779 scene->write_depth = write_depth;
780
781 if (rast->num_threads == 0) {
782 /* no threading */
783
784 lp_rast_begin( rast, fb,
785 fb->nr_cbufs != 0, /* always write color if cbufs present */
786 fb->zsbuf != NULL && write_depth );
787
788 lp_scene_bin_iter_begin( scene );
789 rasterize_scene( rast, 0, scene, write_depth );
790
791 release_scene( rast, scene );
792
793 lp_rast_end( rast );
794 }
795 else {
796 /* threaded rendering! */
797 unsigned i;
798
799 lp_scene_enqueue( rast->full_scenes, scene );
800
801 /* signal the threads that there's work to do */
802 for (i = 0; i < rast->num_threads; i++) {
803 pipe_semaphore_signal(&rast->tasks[i].work_ready);
804 }
805
806 /* wait for work to complete */
807 for (i = 0; i < rast->num_threads; i++) {
808 pipe_semaphore_wait(&rast->tasks[i].work_done);
809 }
810 }
811
812 LP_DBG(DEBUG_SETUP, "%s done \n", __FUNCTION__);
813 }
814
815
816 /**
817 * This is the thread's main entrypoint.
818 * It's a simple loop:
819 * 1. wait for work
820 * 2. do work
821 * 3. signal that we're done
822 */
823 static PIPE_THREAD_ROUTINE( thread_func, init_data )
824 {
825 struct lp_rasterizer_task *task = (struct lp_rasterizer_task *) init_data;
826 struct lp_rasterizer *rast = task->rast;
827 boolean debug = false;
828
829 while (1) {
830 /* wait for work */
831 if (debug)
832 debug_printf("thread %d waiting for work\n", task->thread_index);
833 pipe_semaphore_wait(&task->work_ready);
834
835 if (task->thread_index == 0) {
836 /* thread[0]:
837 * - get next scene to rasterize
838 * - map the framebuffer surfaces
839 */
840 const struct pipe_framebuffer_state *fb;
841 boolean write_depth;
842
843 rast->curr_scene = lp_scene_dequeue( rast->full_scenes, TRUE );
844
845 lp_scene_bin_iter_begin( rast->curr_scene );
846
847 fb = &rast->curr_scene->fb;
848 write_depth = rast->curr_scene->write_depth;
849
850 lp_rast_begin( rast, fb,
851 fb->nr_cbufs != 0,
852 fb->zsbuf != NULL && write_depth );
853 }
854
855 /* Wait for all threads to get here so that threads[1+] don't
856 * get a null rast->curr_scene pointer.
857 */
858 pipe_barrier_wait( &rast->barrier );
859
860 /* do work */
861 if (debug)
862 debug_printf("thread %d doing work\n", task->thread_index);
863 rasterize_scene(rast,
864 task->thread_index,
865 rast->curr_scene,
866 rast->curr_scene->write_depth);
867
868 /* wait for all threads to finish with this scene */
869 pipe_barrier_wait( &rast->barrier );
870
871 if (task->thread_index == 0) {
872 /* thread[0]:
873 * - release the scene object
874 * - unmap the framebuffer surfaces
875 */
876 release_scene( rast, rast->curr_scene );
877 lp_rast_end( rast );
878 }
879
880 /* signal done with work */
881 if (debug)
882 debug_printf("thread %d done working\n", task->thread_index);
883 pipe_semaphore_signal(&task->work_done);
884 }
885
886 return NULL;
887 }
888
889
890 /**
891 * Initialize semaphores and spawn the threads.
892 */
893 static void
894 create_rast_threads(struct lp_rasterizer *rast)
895 {
896 unsigned i;
897
898 #ifdef PIPE_OS_WINDOWS
899 /* Multithreading not supported on windows until conditions and barriers are
900 * properly implemented. */
901 rast->num_threads = 0;
902 #else
903 rast->num_threads = util_cpu_caps.nr_cpus;
904 rast->num_threads = debug_get_num_option("LP_NUM_THREADS", rast->num_threads);
905 rast->num_threads = MIN2(rast->num_threads, MAX_THREADS);
906 #endif
907
908 /* NOTE: if num_threads is zero, we won't use any threads */
909 for (i = 0; i < rast->num_threads; i++) {
910 pipe_semaphore_init(&rast->tasks[i].work_ready, 0);
911 pipe_semaphore_init(&rast->tasks[i].work_done, 0);
912 rast->threads[i] = pipe_thread_create(thread_func,
913 (void *) &rast->tasks[i]);
914 }
915 }
916
917
918
919 /**
920 * Create new lp_rasterizer.
921 * \param empty the queue to put empty scenes on after we've finished
922 * processing them.
923 */
924 struct lp_rasterizer *
925 lp_rast_create( struct pipe_screen *screen, struct lp_scene_queue *empty )
926 {
927 struct lp_rasterizer *rast;
928 unsigned i, cbuf;
929
930 rast = CALLOC_STRUCT(lp_rasterizer);
931 if(!rast)
932 return NULL;
933
934 rast->screen = screen;
935
936 rast->empty_scenes = empty;
937 rast->full_scenes = lp_scene_queue_create();
938
939 for (i = 0; i < Elements(rast->tasks); i++) {
940 struct lp_rasterizer_task *task = &rast->tasks[i];
941
942 for (cbuf = 0; cbuf < PIPE_MAX_COLOR_BUFS; cbuf++ )
943 task->tile.color[cbuf] = align_malloc(TILE_SIZE * TILE_SIZE * 4, 16);
944
945 task->rast = rast;
946 task->thread_index = i;
947 }
948
949 create_rast_threads(rast);
950
951 /* for synchronizing rasterization threads */
952 pipe_barrier_init( &rast->barrier, rast->num_threads );
953
954 return rast;
955 }
956
957
958 /* Shutdown:
959 */
960 void lp_rast_destroy( struct lp_rasterizer *rast )
961 {
962 unsigned i, cbuf;
963
964 util_unreference_framebuffer_state(&rast->state.fb);
965
966 for (i = 0; i < Elements(rast->tasks); i++) {
967 for (cbuf = 0; cbuf < PIPE_MAX_COLOR_BUFS; cbuf++ )
968 align_free(rast->tasks[i].tile.color[cbuf]);
969 }
970
971 /* for synchronizing rasterization threads */
972 pipe_barrier_destroy( &rast->barrier );
973
974 FREE(rast);
975 }
976
977
978 /** Return number of rasterization threads */
979 unsigned
980 lp_rast_get_num_threads( struct lp_rasterizer *rast )
981 {
982 return rast->num_threads;
983 }