llvmpipe: Fix breakeage.
[mesa.git] / src / gallium / drivers / llvmpipe / lp_rast.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 #include <limits.h>
29 #include "util/u_memory.h"
30 #include "util/u_math.h"
31 #include "util/u_cpu_detect.h"
32 #include "util/u_surface.h"
33
34 #include "lp_scene_queue.h"
35 #include "lp_debug.h"
36 #include "lp_fence.h"
37 #include "lp_perf.h"
38 #include "lp_rast.h"
39 #include "lp_rast_priv.h"
40 #include "lp_tile_soa.h"
41 #include "gallivm/lp_bld_debug.h"
42 #include "lp_scene.h"
43
44
45 /**
46 * Begin the rasterization phase.
47 * Map the framebuffer surfaces. Initialize the 'rast' state.
48 */
49 static boolean
50 lp_rast_begin( struct lp_rasterizer *rast,
51 const struct pipe_framebuffer_state *fb,
52 boolean write_color,
53 boolean write_zstencil )
54 {
55 struct pipe_screen *screen = rast->screen;
56 struct pipe_surface *cbuf, *zsbuf;
57 int i;
58
59 LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__);
60
61 util_copy_framebuffer_state(&rast->state.fb, fb);
62
63 rast->state.write_zstencil = write_zstencil;
64 rast->state.write_color = write_color;
65
66 rast->check_for_clipped_tiles = (fb->width % TILE_SIZE != 0 ||
67 fb->height % TILE_SIZE != 0);
68
69
70 for (i = 0; i < rast->state.fb.nr_cbufs; i++) {
71 cbuf = rast->state.fb.cbufs[i];
72 if (cbuf) {
73 rast->cbuf_transfer[i] = screen->get_tex_transfer(rast->screen,
74 cbuf->texture,
75 cbuf->face,
76 cbuf->level,
77 cbuf->zslice,
78 PIPE_TRANSFER_READ_WRITE,
79 0, 0,
80 cbuf->width,
81 cbuf->height);
82 if (!rast->cbuf_transfer[i])
83 goto fail;
84
85 rast->cbuf_map[i] = screen->transfer_map(rast->screen,
86 rast->cbuf_transfer[i]);
87 if (!rast->cbuf_map[i])
88 goto fail;
89 }
90 }
91
92 zsbuf = rast->state.fb.zsbuf;
93 if (zsbuf) {
94 rast->zsbuf_transfer = screen->get_tex_transfer(rast->screen,
95 zsbuf->texture,
96 zsbuf->face,
97 zsbuf->level,
98 zsbuf->zslice,
99 PIPE_TRANSFER_READ_WRITE,
100 0, 0,
101 zsbuf->width,
102 zsbuf->height);
103 if (!rast->zsbuf_transfer)
104 goto fail;
105
106 rast->zsbuf_map = screen->transfer_map(rast->screen,
107 rast->zsbuf_transfer);
108 if (!rast->zsbuf_map)
109 goto fail;
110 }
111
112 return TRUE;
113
114 fail:
115 /* Unmap and release transfers?
116 */
117 return FALSE;
118 }
119
120
121 /**
122 * Finish the rasterization phase.
123 * Unmap framebuffer surfaces.
124 */
125 static void
126 lp_rast_end( struct lp_rasterizer *rast )
127 {
128 struct pipe_screen *screen = rast->screen;
129 unsigned i;
130
131 for (i = 0; i < rast->state.fb.nr_cbufs; i++) {
132 if (rast->cbuf_map[i])
133 screen->transfer_unmap(screen, rast->cbuf_transfer[i]);
134
135 if (rast->cbuf_transfer[i])
136 screen->tex_transfer_destroy(rast->cbuf_transfer[i]);
137
138 rast->cbuf_transfer[i] = NULL;
139 rast->cbuf_map[i] = NULL;
140 }
141
142 if (rast->zsbuf_map)
143 screen->transfer_unmap(screen, rast->zsbuf_transfer);
144
145 if (rast->zsbuf_transfer)
146 screen->tex_transfer_destroy(rast->zsbuf_transfer);
147
148 rast->zsbuf_transfer = NULL;
149 rast->zsbuf_map = NULL;
150 }
151
152
153 /**
154 * Begining rasterization of a tile.
155 * \param x window X position of the tile, in pixels
156 * \param y window Y position of the tile, in pixels
157 */
158 static void
159 lp_rast_start_tile( struct lp_rasterizer *rast,
160 unsigned thread_index,
161 unsigned x, unsigned y )
162 {
163 LP_DBG(DEBUG_RAST, "%s %d,%d\n", __FUNCTION__, x, y);
164
165 rast->tasks[thread_index].x = x;
166 rast->tasks[thread_index].y = y;
167 }
168
169
170 /**
171 * Clear the rasterizer's current color tile.
172 * This is a bin command called during bin processing.
173 */
174 void lp_rast_clear_color( struct lp_rasterizer *rast,
175 unsigned thread_index,
176 const union lp_rast_cmd_arg arg )
177 {
178 const uint8_t *clear_color = arg.clear_color;
179 uint8_t **color_tile = rast->tasks[thread_index].tile.color;
180 unsigned i;
181
182 LP_DBG(DEBUG_RAST, "%s 0x%x,0x%x,0x%x,0x%x\n", __FUNCTION__,
183 clear_color[0],
184 clear_color[1],
185 clear_color[2],
186 clear_color[3]);
187
188 if (clear_color[0] == clear_color[1] &&
189 clear_color[1] == clear_color[2] &&
190 clear_color[2] == clear_color[3]) {
191 /* clear to grayscale value {x, x, x, x} */
192 for (i = 0; i < rast->state.fb.nr_cbufs; i++) {
193 memset(color_tile[i], clear_color[0], TILE_SIZE * TILE_SIZE * 4);
194 }
195 }
196 else {
197 /* Non-gray color.
198 * Note: if the swizzled tile layout changes (see TILE_PIXEL) this code
199 * will need to change. It'll be pretty obvious when clearing no longer
200 * works.
201 */
202 const unsigned chunk = TILE_SIZE / 4;
203 for (i = 0; i < rast->state.fb.nr_cbufs; i++) {
204 uint8_t *c = color_tile[i];
205 unsigned j;
206 for (j = 0; j < 4 * TILE_SIZE; j++) {
207 memset(c, clear_color[0], chunk);
208 c += chunk;
209 memset(c, clear_color[1], chunk);
210 c += chunk;
211 memset(c, clear_color[2], chunk);
212 c += chunk;
213 memset(c, clear_color[3], chunk);
214 c += chunk;
215 }
216 assert(c - color_tile[i] == TILE_SIZE * TILE_SIZE * 4);
217 }
218 }
219
220 LP_COUNT(nr_color_tile_clear);
221 }
222
223
224 /**
225 * Clear the rasterizer's current z/stencil tile.
226 * This is a bin command called during bin processing.
227 */
228 void lp_rast_clear_zstencil( struct lp_rasterizer *rast,
229 unsigned thread_index,
230 const union lp_rast_cmd_arg arg)
231 {
232 struct lp_rasterizer_task *task = &rast->tasks[thread_index];
233 const unsigned tile_x = task->x;
234 const unsigned tile_y = task->y;
235 const unsigned height = TILE_SIZE/TILE_VECTOR_HEIGHT;
236 const unsigned width = TILE_SIZE*TILE_VECTOR_HEIGHT;
237 unsigned block_size = util_format_get_blocksize(rast->zsbuf_transfer->texture->format);
238 uint8_t *dst;
239 unsigned dst_stride = rast->zsbuf_transfer->stride*TILE_VECTOR_HEIGHT;
240 unsigned i, j;
241
242 LP_DBG(DEBUG_RAST, "%s 0x%x\n", __FUNCTION__, arg.clear_zstencil);
243
244 assert(rast->zsbuf_map);
245 if (!rast->zsbuf_map)
246 return;
247
248 LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__);
249
250 /*
251 * Clear the aera of the swizzled depth/depth buffer matching this tile, in
252 * stripes of TILE_VECTOR_HEIGHT x TILE_SIZE at a time.
253 *
254 * The swizzled depth format is such that the depths for
255 * TILE_VECTOR_HEIGHT x TILE_VECTOR_WIDTH pixels have consecutive offsets.
256 */
257
258 dst = lp_rast_depth_pointer(rast, tile_x, tile_y);
259
260 switch (block_size) {
261 case 1:
262 memset(dst, (uint8_t) arg.clear_zstencil, height * width);
263 break;
264 case 2:
265 for (i = 0; i < height; i++) {
266 uint16_t *row = (uint16_t *)dst;
267 for (j = 0; j < width; j++)
268 *row++ = (uint16_t) arg.clear_zstencil;
269 dst += dst_stride;
270 }
271 break;
272 case 4:
273 for (i = 0; i < height; i++) {
274 uint32_t *row = (uint32_t *)dst;
275 for (j = 0; j < width; j++)
276 *row++ = arg.clear_zstencil;
277 dst += dst_stride;
278 }
279 break;
280 default:
281 assert(0);
282 break;
283 }
284 }
285
286
287 /**
288 * Load tile color from the framebuffer surface.
289 * This is a bin command called during bin processing.
290 */
291 void lp_rast_load_color( struct lp_rasterizer *rast,
292 unsigned thread_index,
293 const union lp_rast_cmd_arg arg)
294 {
295 struct lp_rasterizer_task *task = &rast->tasks[thread_index];
296 const unsigned x = task->x;
297 const unsigned y = task->y;
298 unsigned i;
299
300 LP_DBG(DEBUG_RAST, "%s at %u, %u\n", __FUNCTION__, x, y);
301
302 for (i = 0; i < rast->state.fb.nr_cbufs; i++) {
303 struct pipe_transfer *transfer = rast->cbuf_transfer[i];
304 int w = TILE_SIZE;
305 int h = TILE_SIZE;
306
307 if (x >= transfer->width)
308 continue;
309
310 if (y >= transfer->height)
311 continue;
312
313 assert(w >= 0);
314 assert(h >= 0);
315 assert(w <= TILE_SIZE);
316 assert(h <= TILE_SIZE);
317
318 lp_tile_read_4ub(transfer->texture->format,
319 task->tile.color[i],
320 rast->cbuf_map[i],
321 transfer->stride,
322 x, y,
323 w, h);
324
325 LP_COUNT(nr_color_tile_load);
326 }
327 }
328
329
330 void lp_rast_set_state( struct lp_rasterizer *rast,
331 unsigned thread_index,
332 const union lp_rast_cmd_arg arg )
333 {
334 const struct lp_rast_state *state = arg.set_state;
335
336 LP_DBG(DEBUG_RAST, "%s %p\n", __FUNCTION__, (void *) state);
337
338 /* just set the current state pointer for this rasterizer */
339 rast->tasks[thread_index].current_state = state;
340 }
341
342
343
344 /**
345 * Run the shader on all blocks in a tile. This is used when a tile is
346 * completely contained inside a triangle.
347 * This is a bin command called during bin processing.
348 */
349 void lp_rast_shade_tile( struct lp_rasterizer *rast,
350 unsigned thread_index,
351 const union lp_rast_cmd_arg arg )
352 {
353 struct lp_rasterizer_task *task = &rast->tasks[thread_index];
354 const struct lp_rast_state *state = task->current_state;
355 struct lp_rast_tile *tile = &task->tile;
356 const struct lp_rast_shader_inputs *inputs = arg.shade_tile;
357 const unsigned tile_x = task->x;
358 const unsigned tile_y = task->y;
359 unsigned x, y;
360
361 LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__);
362
363 /* render the whole 64x64 tile in 4x4 chunks */
364 for (y = 0; y < TILE_SIZE; y += 4){
365 for (x = 0; x < TILE_SIZE; x += 4) {
366 uint8_t *color[PIPE_MAX_COLOR_BUFS];
367 uint32_t *depth;
368 unsigned block_offset, i;
369
370 /* offset of the 16x16 pixel block within the tile */
371 block_offset = ((y / 4) * (16 * 16) + (x / 4) * 16);
372
373 /* color buffer */
374 for (i = 0; i < rast->state.fb.nr_cbufs; i++)
375 color[i] = tile->color[i] + 4 * block_offset;
376
377 /* depth buffer */
378 depth = lp_rast_depth_pointer(rast, tile_x + x, tile_y + y);
379
380 /* run shader */
381 state->jit_function[0]( &state->jit_context,
382 tile_x + x, tile_y + y,
383 inputs->a0,
384 inputs->dadx,
385 inputs->dady,
386 color,
387 depth,
388 INT_MIN, INT_MIN, INT_MIN,
389 NULL, NULL, NULL );
390 }
391 }
392 }
393
394
395 /**
396 * Compute shading for a 4x4 block of pixels.
397 * This is a bin command called during bin processing.
398 */
399 void lp_rast_shade_quads( struct lp_rasterizer *rast,
400 unsigned thread_index,
401 const struct lp_rast_shader_inputs *inputs,
402 unsigned x, unsigned y,
403 int32_t c1, int32_t c2, int32_t c3)
404 {
405 struct lp_rasterizer_task *task = &rast->tasks[thread_index];
406 const struct lp_rast_state *state = task->current_state;
407 struct lp_rast_tile *tile = &task->tile;
408 uint8_t *color[PIPE_MAX_COLOR_BUFS];
409 void *depth;
410 unsigned i;
411 unsigned ix, iy;
412 int block_offset;
413
414 #ifdef DEBUG
415 assert(state);
416
417 /* Sanity checks */
418 assert(x % TILE_VECTOR_WIDTH == 0);
419 assert(y % TILE_VECTOR_HEIGHT == 0);
420
421 assert((x % 4) == 0);
422 assert((y % 4) == 0);
423 #endif
424
425 ix = x % TILE_SIZE;
426 iy = y % TILE_SIZE;
427
428 /* offset of the 16x16 pixel block within the tile */
429 block_offset = ((iy / 4) * (16 * 16) + (ix / 4) * 16);
430
431 /* color buffer */
432 for (i = 0; i < rast->state.fb.nr_cbufs; i++)
433 color[i] = tile->color[i] + 4 * block_offset;
434
435 /* depth buffer */
436 depth = lp_rast_depth_pointer(rast, x, y);
437
438
439
440 #ifdef DEBUG
441 assert(lp_check_alignment(tile->color[0], 16));
442 assert(lp_check_alignment(state->jit_context.blend_color, 16));
443
444 assert(lp_check_alignment(inputs->step[0], 16));
445 assert(lp_check_alignment(inputs->step[1], 16));
446 assert(lp_check_alignment(inputs->step[2], 16));
447 #endif
448
449 /* run shader */
450 state->jit_function[1]( &state->jit_context,
451 x, y,
452 inputs->a0,
453 inputs->dadx,
454 inputs->dady,
455 color,
456 depth,
457 c1, c2, c3,
458 inputs->step[0], inputs->step[1], inputs->step[2]);
459 }
460
461
462 /**
463 * Set top row and left column of the tile's pixels to white. For debugging.
464 */
465 static void
466 outline_tile(uint8_t *tile)
467 {
468 const uint8_t val = 0xff;
469 unsigned i;
470
471 for (i = 0; i < TILE_SIZE; i++) {
472 TILE_PIXEL(tile, i, 0, 0) = val;
473 TILE_PIXEL(tile, i, 0, 1) = val;
474 TILE_PIXEL(tile, i, 0, 2) = val;
475 TILE_PIXEL(tile, i, 0, 3) = val;
476
477 TILE_PIXEL(tile, 0, i, 0) = val;
478 TILE_PIXEL(tile, 0, i, 1) = val;
479 TILE_PIXEL(tile, 0, i, 2) = val;
480 TILE_PIXEL(tile, 0, i, 3) = val;
481 }
482 }
483
484
485 /**
486 * Draw grid of gray lines at 16-pixel intervals across the tile to
487 * show the sub-tile boundaries. For debugging.
488 */
489 static void
490 outline_subtiles(uint8_t *tile)
491 {
492 const uint8_t val = 0x80;
493 const unsigned step = 16;
494 unsigned i, j;
495
496 for (i = 0; i < TILE_SIZE; i += step) {
497 for (j = 0; j < TILE_SIZE; j++) {
498 TILE_PIXEL(tile, i, j, 0) = val;
499 TILE_PIXEL(tile, i, j, 1) = val;
500 TILE_PIXEL(tile, i, j, 2) = val;
501 TILE_PIXEL(tile, i, j, 3) = val;
502
503 TILE_PIXEL(tile, j, i, 0) = val;
504 TILE_PIXEL(tile, j, i, 1) = val;
505 TILE_PIXEL(tile, j, i, 2) = val;
506 TILE_PIXEL(tile, j, i, 3) = val;
507 }
508 }
509
510 outline_tile(tile);
511 }
512
513
514
515 /**
516 * Write the rasterizer's color tile to the framebuffer.
517 */
518 static void lp_rast_store_color( struct lp_rasterizer *rast,
519 unsigned thread_index)
520 {
521 struct lp_rasterizer_task *task = &rast->tasks[thread_index];
522 const unsigned x = task->x;
523 const unsigned y = task->y;
524 unsigned i;
525
526 for (i = 0; i < rast->state.fb.nr_cbufs; i++) {
527 struct pipe_transfer *transfer = rast->cbuf_transfer[i];
528 int w = TILE_SIZE;
529 int h = TILE_SIZE;
530
531 if (x >= transfer->width)
532 continue;
533
534 if (y >= transfer->height)
535 continue;
536
537 LP_DBG(DEBUG_RAST, "%s [%u] %d,%d %dx%d\n", __FUNCTION__,
538 thread_index, x, y, w, h);
539
540 if (LP_DEBUG & DEBUG_SHOW_SUBTILES)
541 outline_subtiles(task->tile.color[i]);
542 else if (LP_DEBUG & DEBUG_SHOW_TILES)
543 outline_tile(task->tile.color[i]);
544
545 lp_tile_write_4ub(transfer->texture->format,
546 task->tile.color[i],
547 rast->cbuf_map[i],
548 transfer->stride,
549 x, y,
550 w, h);
551
552 LP_COUNT(nr_color_tile_store);
553 }
554 }
555
556
557 /**
558 * Write the rasterizer's tiles to the framebuffer.
559 */
560 static void
561 lp_rast_end_tile( struct lp_rasterizer *rast,
562 unsigned thread_index )
563 {
564 LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__);
565
566 if (rast->state.write_color)
567 lp_rast_store_color(rast, thread_index);
568 }
569
570
571 /**
572 * Signal on a fence. This is called during bin execution/rasterization.
573 * Called per thread.
574 */
575 void lp_rast_fence( struct lp_rasterizer *rast,
576 unsigned thread_index,
577 const union lp_rast_cmd_arg arg )
578 {
579 struct lp_fence *fence = arg.fence;
580
581 pipe_mutex_lock( fence->mutex );
582
583 fence->count++;
584 assert(fence->count <= fence->rank);
585
586 LP_DBG(DEBUG_RAST, "%s count=%u rank=%u\n", __FUNCTION__,
587 fence->count, fence->rank);
588
589 pipe_condvar_signal( fence->signalled );
590
591 pipe_mutex_unlock( fence->mutex );
592 }
593
594
595 /**
596 * When all the threads are done rasterizing a scene, one thread will
597 * call this function to reset the scene and put it onto the empty queue.
598 */
599 static void
600 release_scene( struct lp_rasterizer *rast,
601 struct lp_scene *scene )
602 {
603 util_unreference_framebuffer_state( &scene->fb );
604
605 lp_scene_reset( scene );
606
607 assert(lp_scene_is_empty(scene));
608
609 lp_scene_enqueue( rast->empty_scenes, scene );
610 rast->curr_scene = NULL;
611 }
612
613
614 /**
615 * Rasterize commands for a single bin.
616 * \param x, y position of the bin's tile in the framebuffer
617 * Must be called between lp_rast_begin() and lp_rast_end().
618 * Called per thread.
619 */
620 static void
621 rasterize_bin( struct lp_rasterizer *rast,
622 unsigned thread_index,
623 const struct cmd_bin *bin,
624 int x, int y)
625 {
626 const struct cmd_block_list *commands = &bin->commands;
627 struct cmd_block *block;
628 unsigned k;
629
630 lp_rast_start_tile( rast, thread_index, x, y );
631
632 /* simply execute each of the commands in the block list */
633 for (block = commands->head; block; block = block->next) {
634 for (k = 0; k < block->count; k++) {
635 block->cmd[k]( rast, thread_index, block->arg[k] );
636 }
637 }
638
639 lp_rast_end_tile( rast, thread_index );
640 }
641
642
643 #define RAST(x) { lp_rast_##x, #x }
644
645 static struct {
646 lp_rast_cmd cmd;
647 const char *name;
648 } cmd_names[] =
649 {
650 RAST(load_color),
651 RAST(clear_color),
652 RAST(clear_zstencil),
653 RAST(triangle),
654 RAST(shade_tile),
655 RAST(set_state),
656 RAST(fence),
657 };
658
659 static void
660 debug_bin( const struct cmd_bin *bin )
661 {
662 const struct cmd_block *head = bin->commands.head;
663 int i, j;
664
665 for (i = 0; i < head->count; i++) {
666 debug_printf("%d: ", i);
667 for (j = 0; j < Elements(cmd_names); j++) {
668 if (head->cmd[i] == cmd_names[j].cmd) {
669 debug_printf("%s\n", cmd_names[j].name);
670 break;
671 }
672 }
673 if (j == Elements(cmd_names))
674 debug_printf("...other\n");
675 }
676
677 }
678
679 /* An empty bin is one that just loads the contents of the tile and
680 * stores them again unchanged. This typically happens when bins have
681 * been flushed for some reason in the middle of a frame, or when
682 * incremental updates are being made to a render target.
683 *
684 * Try to avoid doing pointless work in this case.
685 */
686 static boolean
687 is_empty_bin( const struct cmd_bin *bin )
688 {
689 const struct cmd_block *head = bin->commands.head;
690 int i;
691
692 if (0)
693 debug_bin(bin);
694
695 /* We emit at most two load-tile commands at the start of the first
696 * command block. In addition we seem to emit a couple of
697 * set-state commands even in empty bins.
698 *
699 * As a heuristic, if a bin has more than 4 commands, consider it
700 * non-empty.
701 */
702 if (head->next != NULL ||
703 head->count > 4) {
704 return FALSE;
705 }
706
707 for (i = 0; i < head->count; i++)
708 if (head->cmd[i] != lp_rast_load_color &&
709 head->cmd[i] != lp_rast_set_state) {
710 return FALSE;
711 }
712
713 return TRUE;
714 }
715
716
717
718 /**
719 * Rasterize/execute all bins within a scene.
720 * Called per thread.
721 */
722 static void
723 rasterize_scene( struct lp_rasterizer *rast,
724 unsigned thread_index,
725 struct lp_scene *scene,
726 bool write_depth )
727 {
728 /* loop over scene bins, rasterize each */
729 #if 0
730 {
731 unsigned i, j;
732 for (i = 0; i < scene->tiles_x; i++) {
733 for (j = 0; j < scene->tiles_y; j++) {
734 struct cmd_bin *bin = lp_get_bin(scene, i, j);
735 rasterize_bin( rast, thread_index,
736 bin, i * TILE_SIZE, j * TILE_SIZE );
737 }
738 }
739 }
740 #else
741 {
742 struct cmd_bin *bin;
743 int x, y;
744
745 assert(scene);
746 while ((bin = lp_scene_bin_iter_next(scene, &x, &y))) {
747 if (!is_empty_bin( bin ))
748 rasterize_bin( rast, thread_index, bin, x * TILE_SIZE, y * TILE_SIZE);
749 }
750 }
751 #endif
752 }
753
754
755 /**
756 * Called by setup module when it has something for us to render.
757 */
758 void
759 lp_rasterize_scene( struct lp_rasterizer *rast,
760 struct lp_scene *scene,
761 const struct pipe_framebuffer_state *fb,
762 bool write_depth )
763 {
764 boolean debug = false;
765
766 LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__);
767
768 if (debug) {
769 unsigned x, y;
770 debug_printf("rasterize scene:\n");
771 debug_printf(" data size: %u\n", lp_scene_data_size(scene));
772 for (y = 0; y < scene->tiles_y; y++) {
773 for (x = 0; x < scene->tiles_x; x++) {
774 debug_printf(" bin %u, %u size: %u\n", x, y,
775 lp_scene_bin_size(scene, x, y));
776 }
777 }
778 }
779
780 /* save framebuffer state in the bin */
781 util_copy_framebuffer_state(&scene->fb, fb);
782 scene->write_depth = write_depth;
783
784 if (rast->num_threads == 0) {
785 /* no threading */
786
787 lp_rast_begin( rast, fb,
788 fb->nr_cbufs != 0, /* always write color if cbufs present */
789 fb->zsbuf != NULL && write_depth );
790
791 lp_scene_bin_iter_begin( scene );
792 rasterize_scene( rast, 0, scene, write_depth );
793
794 release_scene( rast, scene );
795
796 lp_rast_end( rast );
797 }
798 else {
799 /* threaded rendering! */
800 unsigned i;
801
802 lp_scene_enqueue( rast->full_scenes, scene );
803
804 /* signal the threads that there's work to do */
805 for (i = 0; i < rast->num_threads; i++) {
806 pipe_semaphore_signal(&rast->tasks[i].work_ready);
807 }
808
809 /* wait for work to complete */
810 for (i = 0; i < rast->num_threads; i++) {
811 pipe_semaphore_wait(&rast->tasks[i].work_done);
812 }
813 }
814
815 LP_DBG(DEBUG_SETUP, "%s done \n", __FUNCTION__);
816 }
817
818
819 /**
820 * This is the thread's main entrypoint.
821 * It's a simple loop:
822 * 1. wait for work
823 * 2. do work
824 * 3. signal that we're done
825 */
826 static PIPE_THREAD_ROUTINE( thread_func, init_data )
827 {
828 struct lp_rasterizer_task *task = (struct lp_rasterizer_task *) init_data;
829 struct lp_rasterizer *rast = task->rast;
830 boolean debug = false;
831
832 while (1) {
833 /* wait for work */
834 if (debug)
835 debug_printf("thread %d waiting for work\n", task->thread_index);
836 pipe_semaphore_wait(&task->work_ready);
837
838 if (task->thread_index == 0) {
839 /* thread[0]:
840 * - get next scene to rasterize
841 * - map the framebuffer surfaces
842 */
843 const struct pipe_framebuffer_state *fb;
844 boolean write_depth;
845
846 rast->curr_scene = lp_scene_dequeue( rast->full_scenes, TRUE );
847
848 lp_scene_bin_iter_begin( rast->curr_scene );
849
850 fb = &rast->curr_scene->fb;
851 write_depth = rast->curr_scene->write_depth;
852
853 lp_rast_begin( rast, fb,
854 fb->nr_cbufs != 0,
855 fb->zsbuf != NULL && write_depth );
856 }
857
858 /* Wait for all threads to get here so that threads[1+] don't
859 * get a null rast->curr_scene pointer.
860 */
861 pipe_barrier_wait( &rast->barrier );
862
863 /* do work */
864 if (debug)
865 debug_printf("thread %d doing work\n", task->thread_index);
866 rasterize_scene(rast,
867 task->thread_index,
868 rast->curr_scene,
869 rast->curr_scene->write_depth);
870
871 /* wait for all threads to finish with this scene */
872 pipe_barrier_wait( &rast->barrier );
873
874 if (task->thread_index == 0) {
875 /* thread[0]:
876 * - release the scene object
877 * - unmap the framebuffer surfaces
878 */
879 release_scene( rast, rast->curr_scene );
880 lp_rast_end( rast );
881 }
882
883 /* signal done with work */
884 if (debug)
885 debug_printf("thread %d done working\n", task->thread_index);
886 pipe_semaphore_signal(&task->work_done);
887 }
888
889 return NULL;
890 }
891
892
893 /**
894 * Initialize semaphores and spawn the threads.
895 */
896 static void
897 create_rast_threads(struct lp_rasterizer *rast)
898 {
899 unsigned i;
900
901 #ifdef PIPE_OS_WINDOWS
902 /* Multithreading not supported on windows until conditions and barriers are
903 * properly implemented. */
904 rast->num_threads = 0;
905 #else
906 rast->num_threads = util_cpu_caps.nr_cpus;
907 rast->num_threads = debug_get_num_option("LP_NUM_THREADS", rast->num_threads);
908 rast->num_threads = MIN2(rast->num_threads, MAX_THREADS);
909 #endif
910
911 /* NOTE: if num_threads is zero, we won't use any threads */
912 for (i = 0; i < rast->num_threads; i++) {
913 pipe_semaphore_init(&rast->tasks[i].work_ready, 0);
914 pipe_semaphore_init(&rast->tasks[i].work_done, 0);
915 rast->threads[i] = pipe_thread_create(thread_func,
916 (void *) &rast->tasks[i]);
917 }
918 }
919
920
921
922 /**
923 * Create new lp_rasterizer.
924 * \param empty the queue to put empty scenes on after we've finished
925 * processing them.
926 */
927 struct lp_rasterizer *
928 lp_rast_create( struct pipe_screen *screen, struct lp_scene_queue *empty )
929 {
930 struct lp_rasterizer *rast;
931 unsigned i, cbuf;
932
933 rast = CALLOC_STRUCT(lp_rasterizer);
934 if(!rast)
935 return NULL;
936
937 rast->screen = screen;
938
939 rast->empty_scenes = empty;
940 rast->full_scenes = lp_scene_queue_create();
941
942 for (i = 0; i < Elements(rast->tasks); i++) {
943 struct lp_rasterizer_task *task = &rast->tasks[i];
944
945 for (cbuf = 0; cbuf < PIPE_MAX_COLOR_BUFS; cbuf++ )
946 task->tile.color[cbuf] = align_malloc(TILE_SIZE * TILE_SIZE * 4, 16);
947
948 task->rast = rast;
949 task->thread_index = i;
950 }
951
952 create_rast_threads(rast);
953
954 /* for synchronizing rasterization threads */
955 pipe_barrier_init( &rast->barrier, rast->num_threads );
956
957 return rast;
958 }
959
960
961 /* Shutdown:
962 */
963 void lp_rast_destroy( struct lp_rasterizer *rast )
964 {
965 unsigned i, cbuf;
966
967 util_unreference_framebuffer_state(&rast->state.fb);
968
969 for (i = 0; i < Elements(rast->tasks); i++) {
970 for (cbuf = 0; cbuf < PIPE_MAX_COLOR_BUFS; cbuf++ )
971 align_free(rast->tasks[i].tile.color[cbuf]);
972 }
973
974 /* for synchronizing rasterization threads */
975 pipe_barrier_destroy( &rast->barrier );
976
977 FREE(rast);
978 }
979
980
981 /** Return number of rasterization threads */
982 unsigned
983 lp_rast_get_num_threads( struct lp_rasterizer *rast )
984 {
985 return rast->num_threads;
986 }