6c7ece9fdbf969a08abc7d8aea49c0da4704c02e
[mesa.git] / src / gallium / drivers / llvmpipe / lp_rast.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 #include <limits.h>
29 #include "util/u_memory.h"
30 #include "util/u_math.h"
31 #include "util/u_cpu_detect.h"
32 #include "util/u_surface.h"
33
34 #include "lp_scene_queue.h"
35 #include "lp_debug.h"
36 #include "lp_fence.h"
37 #include "lp_rast.h"
38 #include "lp_rast_priv.h"
39 #include "lp_tile_soa.h"
40 #include "lp_bld_debug.h"
41 #include "lp_scene.h"
42
43
44 /**
45 * Begin the rasterization phase.
46 * Map the framebuffer surfaces. Initialize the 'rast' state.
47 */
48 static boolean
49 lp_rast_begin( struct lp_rasterizer *rast,
50 const struct pipe_framebuffer_state *fb,
51 boolean write_color,
52 boolean write_zstencil )
53 {
54 struct pipe_screen *screen = rast->screen;
55 struct pipe_surface *cbuf, *zsbuf;
56 int i;
57
58 LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__);
59
60 util_copy_framebuffer_state(&rast->state.fb, fb);
61
62 rast->state.write_zstencil = write_zstencil;
63 rast->state.write_color = write_color;
64
65 rast->check_for_clipped_tiles = (fb->width % TILE_SIZE != 0 ||
66 fb->height % TILE_SIZE != 0);
67
68
69 for (i = 0; i < rast->state.fb.nr_cbufs; i++) {
70 cbuf = rast->state.fb.cbufs[i];
71 if (cbuf) {
72 rast->cbuf_transfer[i] = screen->get_tex_transfer(rast->screen,
73 cbuf->texture,
74 cbuf->face,
75 cbuf->level,
76 cbuf->zslice,
77 PIPE_TRANSFER_READ_WRITE,
78 0, 0,
79 cbuf->width,
80 cbuf->height);
81 if (!rast->cbuf_transfer[i])
82 goto fail;
83
84 rast->cbuf_map[i] = screen->transfer_map(rast->screen,
85 rast->cbuf_transfer[i]);
86 if (!rast->cbuf_map[i])
87 goto fail;
88 }
89 }
90
91 zsbuf = rast->state.fb.zsbuf;
92 if (zsbuf) {
93 rast->zsbuf_transfer = screen->get_tex_transfer(rast->screen,
94 zsbuf->texture,
95 zsbuf->face,
96 zsbuf->level,
97 zsbuf->zslice,
98 PIPE_TRANSFER_READ_WRITE,
99 0, 0,
100 zsbuf->width,
101 zsbuf->height);
102 if (!rast->zsbuf_transfer)
103 goto fail;
104
105 rast->zsbuf_map = screen->transfer_map(rast->screen,
106 rast->zsbuf_transfer);
107 if (!rast->zsbuf_map)
108 goto fail;
109 }
110
111 return TRUE;
112
113 fail:
114 /* Unmap and release transfers?
115 */
116 return FALSE;
117 }
118
119
120 /**
121 * Finish the rasterization phase.
122 * Unmap framebuffer surfaces.
123 */
124 static void
125 lp_rast_end( struct lp_rasterizer *rast )
126 {
127 struct pipe_screen *screen = rast->screen;
128 unsigned i;
129
130 for (i = 0; i < rast->state.fb.nr_cbufs; i++) {
131 if (rast->cbuf_map[i])
132 screen->transfer_unmap(screen, rast->cbuf_transfer[i]);
133
134 if (rast->cbuf_transfer[i])
135 screen->tex_transfer_destroy(rast->cbuf_transfer[i]);
136
137 rast->cbuf_transfer[i] = NULL;
138 rast->cbuf_map[i] = NULL;
139 }
140
141 if (rast->zsbuf_map)
142 screen->transfer_unmap(screen, rast->zsbuf_transfer);
143
144 if (rast->zsbuf_transfer)
145 screen->tex_transfer_destroy(rast->zsbuf_transfer);
146
147 rast->zsbuf_transfer = NULL;
148 rast->zsbuf_map = NULL;
149 }
150
151
152 /**
153 * Begining rasterization of a tile.
154 * \param x window X position of the tile, in pixels
155 * \param y window Y position of the tile, in pixels
156 */
157 static void
158 lp_rast_start_tile( struct lp_rasterizer *rast,
159 unsigned thread_index,
160 unsigned x, unsigned y )
161 {
162 LP_DBG(DEBUG_RAST, "%s %d,%d\n", __FUNCTION__, x, y);
163
164 rast->tasks[thread_index].x = x;
165 rast->tasks[thread_index].y = y;
166 }
167
168
169 /**
170 * Clear the rasterizer's current color tile.
171 * This is a bin command called during bin processing.
172 */
173 void lp_rast_clear_color( struct lp_rasterizer *rast,
174 unsigned thread_index,
175 const union lp_rast_cmd_arg arg )
176 {
177 const uint8_t *clear_color = arg.clear_color;
178 uint8_t **color_tile = rast->tasks[thread_index].tile.color;
179 unsigned i;
180
181 LP_DBG(DEBUG_RAST, "%s 0x%x,0x%x,0x%x,0x%x\n", __FUNCTION__,
182 clear_color[0],
183 clear_color[1],
184 clear_color[2],
185 clear_color[3]);
186
187 if (clear_color[0] == clear_color[1] &&
188 clear_color[1] == clear_color[2] &&
189 clear_color[2] == clear_color[3]) {
190 for (i = 0; i < rast->state.fb.nr_cbufs; i++) {
191 memset(color_tile[i], clear_color[0], TILE_SIZE * TILE_SIZE * 4);
192 }
193 }
194 else {
195 unsigned x, y, chan;
196 for (i = 0; i < rast->state.fb.nr_cbufs; i++)
197 for (y = 0; y < TILE_SIZE; y++)
198 for (x = 0; x < TILE_SIZE; x++)
199 for (chan = 0; chan < 4; ++chan)
200 TILE_PIXEL(color_tile[i], x, y, chan) = clear_color[chan];
201 }
202 }
203
204
205 /**
206 * Clear the rasterizer's current z/stencil tile.
207 * This is a bin command called during bin processing.
208 */
209 void lp_rast_clear_zstencil( struct lp_rasterizer *rast,
210 unsigned thread_index,
211 const union lp_rast_cmd_arg arg)
212 {
213 unsigned i, j;
214 uint32_t *depth_tile = rast->tasks[thread_index].tile.depth;
215
216 LP_DBG(DEBUG_RAST, "%s 0x%x\n", __FUNCTION__, arg.clear_zstencil);
217
218 for (i = 0; i < TILE_SIZE; i++)
219 for (j = 0; j < TILE_SIZE; j++)
220 depth_tile[i*TILE_SIZE + j] = arg.clear_zstencil;
221 }
222
223
224 /**
225 * Load tile color from the framebuffer surface.
226 * This is a bin command called during bin processing.
227 */
228 void lp_rast_load_color( struct lp_rasterizer *rast,
229 unsigned thread_index,
230 const union lp_rast_cmd_arg arg)
231 {
232 struct lp_rasterizer_task *task = &rast->tasks[thread_index];
233 const unsigned x = task->x;
234 const unsigned y = task->y;
235 unsigned i;
236
237 LP_DBG(DEBUG_RAST, "%s at %u, %u\n", __FUNCTION__, x, y);
238
239 for (i = 0; i < rast->state.fb.nr_cbufs; i++) {
240 struct pipe_transfer *transfer = rast->cbuf_transfer[i];
241 int w = TILE_SIZE;
242 int h = TILE_SIZE;
243
244 if (x >= transfer->width)
245 continue;
246
247 if (y >= transfer->height)
248 continue;
249 /* XXX: require tile-size aligned render target dimensions:
250 */
251 if (x + w > transfer->width)
252 w -= x + w - transfer->width;
253
254 if (y + h > transfer->height)
255 h -= y + h - transfer->height;
256
257 assert(w >= 0);
258 assert(h >= 0);
259 assert(w <= TILE_SIZE);
260 assert(h <= TILE_SIZE);
261
262 lp_tile_read_4ub(transfer->texture->format,
263 rast->tasks[thread_index].tile.color[i],
264 rast->cbuf_map[i],
265 transfer->stride,
266 x, y,
267 w, h);
268 }
269 }
270
271
272 static void
273 lp_tile_read_z32(uint32_t *tile,
274 const uint8_t *map,
275 unsigned map_stride,
276 unsigned x0, unsigned y0, unsigned w, unsigned h)
277 {
278 unsigned x, y;
279 const uint8_t *map_row = map + y0*map_stride;
280 for (y = 0; y < h; ++y) {
281 const uint32_t *map_pixel = (uint32_t *)(map_row + x0*4);
282 for (x = 0; x < w; ++x) {
283 *tile++ = *map_pixel++;
284 }
285 map_row += map_stride;
286 }
287 }
288
289 /**
290 * Load tile z/stencil from the framebuffer surface.
291 * This is a bin command called during bin processing.
292 */
293 void lp_rast_load_zstencil( struct lp_rasterizer *rast,
294 unsigned thread_index,
295 const union lp_rast_cmd_arg arg )
296 {
297 const unsigned x = rast->tasks[thread_index].x;
298 const unsigned y = rast->tasks[thread_index].y;
299 unsigned w = TILE_SIZE;
300 unsigned h = TILE_SIZE;
301
302 if (x + w > rast->state.fb.width)
303 w -= x + w - rast->state.fb.width;
304
305 if (y + h > rast->state.fb.height)
306 h -= y + h - rast->state.fb.height;
307
308 LP_DBG(DEBUG_RAST, "%s %d,%d %dx%d\n", __FUNCTION__, x, y, w, h);
309
310 assert(rast->zsbuf_transfer->texture->format == PIPE_FORMAT_Z32_UNORM);
311 lp_tile_read_z32(rast->tasks[thread_index].tile.depth,
312 rast->zsbuf_map,
313 rast->zsbuf_transfer->stride,
314 x, y, w, h);
315 }
316
317
318 void lp_rast_set_state( struct lp_rasterizer *rast,
319 unsigned thread_index,
320 const union lp_rast_cmd_arg arg )
321 {
322 const struct lp_rast_state *state = arg.set_state;
323
324 LP_DBG(DEBUG_RAST, "%s %p\n", __FUNCTION__, (void *) state);
325
326 /* just set the current state pointer for this rasterizer */
327 rast->tasks[thread_index].current_state = state;
328 }
329
330
331
332 /* Within a tile:
333 */
334
335 /**
336 * Run the shader on all blocks in a tile. This is used when a tile is
337 * completely contained inside a triangle.
338 * This is a bin command called during bin processing.
339 */
340 void lp_rast_shade_tile( struct lp_rasterizer *rast,
341 unsigned thread_index,
342 const union lp_rast_cmd_arg arg )
343 {
344 /* Set c1,c2,c3 to large values so the in/out test always passes */
345 const int32_t c1 = INT_MIN, c2 = INT_MIN, c3 = INT_MIN;
346 const struct lp_rast_shader_inputs *inputs = arg.shade_tile;
347 const unsigned tile_x = rast->tasks[thread_index].x;
348 const unsigned tile_y = rast->tasks[thread_index].y;
349 unsigned x, y;
350
351 LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__);
352
353 /* Use the existing preference for 4x4 (four quads) shading:
354 */
355 for (y = 0; y < TILE_SIZE; y += 4)
356 for (x = 0; x < TILE_SIZE; x += 4)
357 lp_rast_shade_quads( rast,
358 thread_index,
359 inputs,
360 tile_x + x,
361 tile_y + y,
362 c1, c2, c3);
363 }
364
365
366 /**
367 * Compute shading for a 4x4 block of pixels.
368 * This is a bin command called during bin processing.
369 */
370 void lp_rast_shade_quads( struct lp_rasterizer *rast,
371 unsigned thread_index,
372 const struct lp_rast_shader_inputs *inputs,
373 unsigned x, unsigned y,
374 int32_t c1, int32_t c2, int32_t c3)
375 {
376 const struct lp_rast_state *state = rast->tasks[thread_index].current_state;
377 struct lp_rast_tile *tile = &rast->tasks[thread_index].tile;
378 uint8_t *color[PIPE_MAX_COLOR_BUFS];
379 void *depth;
380 unsigned i;
381 unsigned ix, iy;
382 int block_offset;
383
384 #ifdef DEBUG
385 assert(state);
386
387 /* Sanity checks */
388 assert(x % TILE_VECTOR_WIDTH == 0);
389 assert(y % TILE_VECTOR_HEIGHT == 0);
390
391 assert((x % 4) == 0);
392 assert((y % 4) == 0);
393 #endif
394
395 ix = x % TILE_SIZE;
396 iy = y % TILE_SIZE;
397
398 /* offset of the 16x16 pixel block within the tile */
399 block_offset = ((iy/4)*(16*16) + (ix/4)*16);
400
401 /* color buffer */
402 for (i = 0; i < rast->state.fb.nr_cbufs; i++)
403 color[i] = tile->color[i] + 4 * block_offset;
404
405 /* depth buffer */
406 depth = tile->depth + block_offset;
407
408
409
410 #ifdef DEBUG
411 assert(lp_check_alignment(tile->depth, 16));
412 assert(lp_check_alignment(tile->color[0], 16));
413 assert(lp_check_alignment(state->jit_context.blend_color, 16));
414
415 assert(lp_check_alignment(inputs->step[0], 16));
416 assert(lp_check_alignment(inputs->step[1], 16));
417 assert(lp_check_alignment(inputs->step[2], 16));
418 #endif
419
420 /* run shader */
421 state->jit_function( &state->jit_context,
422 x, y,
423 inputs->a0,
424 inputs->dadx,
425 inputs->dady,
426 color,
427 depth,
428 c1, c2, c3,
429 inputs->step[0], inputs->step[1], inputs->step[2]);
430 }
431
432
433 /* End of tile:
434 */
435
436
437 /**
438 * Write the rasterizer's color tile to the framebuffer.
439 */
440 static void lp_rast_store_color( struct lp_rasterizer *rast,
441 unsigned thread_index)
442 {
443 const unsigned x = rast->tasks[thread_index].x;
444 const unsigned y = rast->tasks[thread_index].y;
445 unsigned i;
446
447 for (i = 0; i < rast->state.fb.nr_cbufs; i++) {
448 struct pipe_transfer *transfer = rast->cbuf_transfer[i];
449 int w = TILE_SIZE;
450 int h = TILE_SIZE;
451
452 if (x >= transfer->width)
453 continue;
454
455 if (y >= transfer->height)
456 continue;
457
458 /* XXX: require tile-size aligned render target dimensions:
459 */
460 if (x + w > transfer->width)
461 w -= x + w - transfer->width;
462
463 if (y + h > transfer->height)
464 h -= y + h - transfer->height;
465
466 assert(w >= 0);
467 assert(h >= 0);
468 assert(w <= TILE_SIZE);
469 assert(h <= TILE_SIZE);
470
471 LP_DBG(DEBUG_RAST, "%s [%u] %d,%d %dx%d\n", __FUNCTION__,
472 thread_index, x, y, w, h);
473
474 lp_tile_write_4ub(transfer->texture->format,
475 rast->tasks[thread_index].tile.color[i],
476 rast->cbuf_map[i],
477 transfer->stride,
478 x, y,
479 w, h);
480 }
481 }
482
483
484 static void
485 lp_tile_write_z32(const uint32_t *src, uint8_t *dst, unsigned dst_stride,
486 unsigned x0, unsigned y0, unsigned w, unsigned h)
487 {
488 unsigned x, y;
489 uint8_t *dst_row = dst + y0*dst_stride;
490 for (y = 0; y < h; ++y) {
491 uint32_t *dst_pixel = (uint32_t *)(dst_row + x0*4);
492 for (x = 0; x < w; ++x) {
493 *dst_pixel++ = *src++;
494 }
495 dst_row += dst_stride;
496 }
497 }
498
499 /**
500 * Write the rasterizer's z/stencil tile to the framebuffer.
501 */
502 static void lp_rast_store_zstencil( struct lp_rasterizer *rast,
503 unsigned thread_index )
504 {
505 const unsigned x = rast->tasks[thread_index].x;
506 const unsigned y = rast->tasks[thread_index].y;
507 unsigned w = TILE_SIZE;
508 unsigned h = TILE_SIZE;
509
510 if (x + w > rast->state.fb.width)
511 w -= x + w - rast->state.fb.width;
512
513 if (y + h > rast->state.fb.height)
514 h -= y + h - rast->state.fb.height;
515
516 LP_DBG(DEBUG_RAST, "%s %d,%d %dx%d\n", __FUNCTION__, x, y, w, h);
517
518 assert(rast->zsbuf_transfer->texture->format == PIPE_FORMAT_Z32_UNORM);
519 lp_tile_write_z32(rast->tasks[thread_index].tile.depth,
520 rast->zsbuf_map,
521 rast->zsbuf_transfer->stride,
522 x, y, w, h);
523 }
524
525
526 /**
527 * Write the rasterizer's tiles to the framebuffer.
528 */
529 static void
530 lp_rast_end_tile( struct lp_rasterizer *rast,
531 unsigned thread_index )
532 {
533 LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__);
534
535 if (rast->state.write_color)
536 lp_rast_store_color(rast, thread_index);
537
538 if (rast->state.write_zstencil)
539 lp_rast_store_zstencil(rast, thread_index);
540 }
541
542
543 /**
544 * Signal on a fence. This is called during bin execution/rasterization.
545 * Called per thread.
546 */
547 void lp_rast_fence( struct lp_rasterizer *rast,
548 unsigned thread_index,
549 const union lp_rast_cmd_arg arg )
550 {
551 struct lp_fence *fence = arg.fence;
552
553 pipe_mutex_lock( fence->mutex );
554
555 fence->count++;
556 assert(fence->count <= fence->rank);
557
558 LP_DBG(DEBUG_RAST, "%s count=%u rank=%u\n", __FUNCTION__,
559 fence->count, fence->rank);
560
561 pipe_condvar_signal( fence->signalled );
562
563 pipe_mutex_unlock( fence->mutex );
564 }
565
566
567 /**
568 * When all the threads are done rasterizing a scene, one thread will
569 * call this function to reset the scene and put it onto the empty queue.
570 */
571 static void
572 release_scene( struct lp_rasterizer *rast,
573 struct lp_scene *scene )
574 {
575 util_unreference_framebuffer_state( &scene->fb );
576
577 lp_scene_reset( scene );
578 lp_scene_enqueue( rast->empty_scenes, scene );
579 rast->curr_scene = NULL;
580 }
581
582
583 /**
584 * Rasterize commands for a single bin.
585 * \param x, y position of the bin's tile in the framebuffer
586 * Must be called between lp_rast_begin() and lp_rast_end().
587 * Called per thread.
588 */
589 static void
590 rasterize_bin( struct lp_rasterizer *rast,
591 unsigned thread_index,
592 const struct cmd_bin *bin,
593 int x, int y)
594 {
595 const struct cmd_block_list *commands = &bin->commands;
596 struct cmd_block *block;
597 unsigned k;
598
599 lp_rast_start_tile( rast, thread_index, x, y );
600
601 /* simply execute each of the commands in the block list */
602 for (block = commands->head; block; block = block->next) {
603 for (k = 0; k < block->count; k++) {
604 block->cmd[k]( rast, thread_index, block->arg[k] );
605 }
606 }
607
608 lp_rast_end_tile( rast, thread_index );
609 }
610
611
612 #define RAST(x) { lp_rast_##x, #x }
613
614 static struct {
615 lp_rast_cmd cmd;
616 const char *name;
617 } cmd_names[] =
618 {
619 RAST(load_color),
620 RAST(load_zstencil),
621 RAST(clear_color),
622 RAST(clear_zstencil),
623 RAST(triangle),
624 RAST(shade_tile),
625 RAST(set_state),
626 RAST(fence),
627 };
628
629 static void
630 debug_bin( const struct cmd_bin *bin )
631 {
632 const struct cmd_block *head = bin->commands.head;
633 int i, j;
634
635 for (i = 0; i < head->count; i++) {
636 debug_printf("%d: ", i);
637 for (j = 0; j < Elements(cmd_names); j++) {
638 if (head->cmd[i] == cmd_names[j].cmd) {
639 debug_printf("%s\n", cmd_names[j].name);
640 break;
641 }
642 }
643 if (j == Elements(cmd_names))
644 debug_printf("...other\n");
645 }
646
647 }
648
649 /* An empty bin is one that just loads the contents of the tile and
650 * stores them again unchanged. This typically happens when bins have
651 * been flushed for some reason in the middle of a frame, or when
652 * incremental updates are being made to a render target.
653 *
654 * Try to avoid doing pointless work in this case.
655 */
656 static boolean
657 is_empty_bin( const struct cmd_bin *bin )
658 {
659 const struct cmd_block *head = bin->commands.head;
660 int i;
661
662 if (0)
663 debug_bin(bin);
664
665 /* We emit at most two load-tile commands at the start of the first
666 * command block. In addition we seem to emit a couple of
667 * set-state commands even in empty bins.
668 *
669 * As a heuristic, if a bin has more than 4 commands, consider it
670 * non-empty.
671 */
672 if (head->next != NULL ||
673 head->count > 4) {
674 return FALSE;
675 }
676
677 for (i = 0; i < head->count; i++)
678 if (head->cmd[i] != lp_rast_load_color &&
679 head->cmd[i] != lp_rast_load_zstencil &&
680 head->cmd[i] != lp_rast_set_state) {
681 return FALSE;
682 }
683
684 return TRUE;
685 }
686
687
688
689 /**
690 * Rasterize/execute all bins within a scene.
691 * Called per thread.
692 */
693 static void
694 rasterize_scene( struct lp_rasterizer *rast,
695 unsigned thread_index,
696 struct lp_scene *scene,
697 bool write_depth )
698 {
699 /* loop over scene bins, rasterize each */
700 #if 0
701 {
702 unsigned i, j;
703 for (i = 0; i < scene->tiles_x; i++) {
704 for (j = 0; j < scene->tiles_y; j++) {
705 struct cmd_bin *bin = lp_get_bin(scene, i, j);
706 rasterize_bin( rast, thread_index,
707 bin, i * TILE_SIZE, j * TILE_SIZE );
708 }
709 }
710 }
711 #else
712 {
713 struct cmd_bin *bin;
714 int x, y;
715
716 assert(scene);
717 while ((bin = lp_scene_bin_iter_next(scene, &x, &y))) {
718 if (!is_empty_bin( bin ))
719 rasterize_bin( rast, thread_index, bin, x * TILE_SIZE, y * TILE_SIZE);
720 }
721 }
722 #endif
723 }
724
725
726 /**
727 * Called by setup module when it has something for us to render.
728 */
729 void
730 lp_rasterize_scene( struct lp_rasterizer *rast,
731 struct lp_scene *scene,
732 const struct pipe_framebuffer_state *fb,
733 bool write_depth )
734 {
735 boolean debug = false;
736
737 LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__);
738
739 if (debug) {
740 unsigned x, y;
741 printf("rasterize scene:\n");
742 printf(" data size: %u\n", lp_scene_data_size(scene));
743 for (y = 0; y < scene->tiles_y; y++) {
744 for (x = 0; x < scene->tiles_x; x++) {
745 printf(" bin %u, %u size: %u\n", x, y,
746 lp_scene_bin_size(scene, x, y));
747 }
748 }
749 }
750
751 /* save framebuffer state in the bin */
752 util_copy_framebuffer_state(&scene->fb, fb);
753 scene->write_depth = write_depth;
754
755 if (rast->num_threads == 0) {
756 /* no threading */
757
758 lp_rast_begin( rast, fb,
759 fb->nr_cbufs != 0, /* always write color if cbufs present */
760 fb->zsbuf != NULL && write_depth );
761
762 lp_scene_bin_iter_begin( scene );
763 rasterize_scene( rast, 0, scene, write_depth );
764
765 release_scene( rast, scene );
766
767 lp_rast_end( rast );
768 }
769 else {
770 /* threaded rendering! */
771 unsigned i;
772
773 lp_scene_enqueue( rast->full_scenes, scene );
774
775 /* signal the threads that there's work to do */
776 for (i = 0; i < rast->num_threads; i++) {
777 pipe_semaphore_signal(&rast->tasks[i].work_ready);
778 }
779
780 /* wait for work to complete */
781 for (i = 0; i < rast->num_threads; i++) {
782 pipe_semaphore_wait(&rast->tasks[i].work_done);
783 }
784 }
785
786 LP_DBG(DEBUG_SETUP, "%s done \n", __FUNCTION__);
787 }
788
789
790 /**
791 * This is the thread's main entrypoint.
792 * It's a simple loop:
793 * 1. wait for work
794 * 2. do work
795 * 3. signal that we're done
796 */
797 static void *
798 thread_func( void *init_data )
799 {
800 struct lp_rasterizer_task *task = (struct lp_rasterizer_task *) init_data;
801 struct lp_rasterizer *rast = task->rast;
802 boolean debug = false;
803
804 while (1) {
805 /* wait for work */
806 if (debug)
807 debug_printf("thread %d waiting for work\n", task->thread_index);
808 pipe_semaphore_wait(&task->work_ready);
809
810 if (task->thread_index == 0) {
811 /* thread[0]:
812 * - get next scene to rasterize
813 * - map the framebuffer surfaces
814 */
815 const struct pipe_framebuffer_state *fb;
816 boolean write_depth;
817
818 rast->curr_scene = lp_scene_dequeue( rast->full_scenes );
819
820 lp_scene_bin_iter_begin( rast->curr_scene );
821
822 fb = &rast->curr_scene->fb;
823 write_depth = rast->curr_scene->write_depth;
824
825 lp_rast_begin( rast, fb,
826 fb->nr_cbufs != 0,
827 fb->zsbuf != NULL && write_depth );
828 }
829
830 /* Wait for all threads to get here so that threads[1+] don't
831 * get a null rast->curr_scene pointer.
832 */
833 pipe_barrier_wait( &rast->barrier );
834
835 /* do work */
836 if (debug)
837 debug_printf("thread %d doing work\n", task->thread_index);
838 rasterize_scene(rast,
839 task->thread_index,
840 rast->curr_scene,
841 rast->curr_scene->write_depth);
842
843 /* wait for all threads to finish with this scene */
844 pipe_barrier_wait( &rast->barrier );
845
846 if (task->thread_index == 0) {
847 /* thread[0]:
848 * - release the scene object
849 * - unmap the framebuffer surfaces
850 */
851 release_scene( rast, rast->curr_scene );
852 lp_rast_end( rast );
853 }
854
855 /* signal done with work */
856 if (debug)
857 debug_printf("thread %d done working\n", task->thread_index);
858 pipe_semaphore_signal(&task->work_done);
859 }
860
861 return NULL;
862 }
863
864
865 /**
866 * Initialize semaphores and spawn the threads.
867 */
868 static void
869 create_rast_threads(struct lp_rasterizer *rast)
870 {
871 unsigned i;
872
873 rast->num_threads = util_cpu_caps.nr_cpus;
874 rast->num_threads = debug_get_num_option("LP_NUM_THREADS", rast->num_threads);
875 rast->num_threads = MIN2(rast->num_threads, MAX_THREADS);
876
877 /* NOTE: if num_threads is zero, we won't use any threads */
878 for (i = 0; i < rast->num_threads; i++) {
879 pipe_semaphore_init(&rast->tasks[i].work_ready, 0);
880 pipe_semaphore_init(&rast->tasks[i].work_done, 0);
881 rast->threads[i] = pipe_thread_create(thread_func,
882 (void *) &rast->tasks[i]);
883 }
884 }
885
886
887
888 /**
889 * Create new lp_rasterizer.
890 * \param empty the queue to put empty scenes on after we've finished
891 * processing them.
892 */
893 struct lp_rasterizer *
894 lp_rast_create( struct pipe_screen *screen, struct lp_scene_queue *empty )
895 {
896 struct lp_rasterizer *rast;
897 unsigned i, cbuf;
898
899 rast = CALLOC_STRUCT(lp_rasterizer);
900 if(!rast)
901 return NULL;
902
903 rast->screen = screen;
904
905 rast->empty_scenes = empty;
906 rast->full_scenes = lp_scene_queue_create();
907
908 for (i = 0; i < Elements(rast->tasks); i++) {
909 for (cbuf = 0; cbuf < PIPE_MAX_COLOR_BUFS; cbuf++ )
910 rast->tasks[i].tile.color[cbuf] = align_malloc( TILE_SIZE*TILE_SIZE*4, 16 );
911
912 rast->tasks[i].tile.depth = align_malloc( TILE_SIZE*TILE_SIZE*4, 16 );
913 rast->tasks[i].rast = rast;
914 rast->tasks[i].thread_index = i;
915 }
916
917 create_rast_threads(rast);
918
919 /* for synchronizing rasterization threads */
920 pipe_barrier_init( &rast->barrier, rast->num_threads );
921
922 return rast;
923 }
924
925
926 /* Shutdown:
927 */
928 void lp_rast_destroy( struct lp_rasterizer *rast )
929 {
930 unsigned i, cbuf;
931
932 util_unreference_framebuffer_state(&rast->state.fb);
933
934 for (i = 0; i < Elements(rast->tasks); i++) {
935 align_free(rast->tasks[i].tile.depth);
936 for (cbuf = 0; cbuf < PIPE_MAX_COLOR_BUFS; cbuf++ )
937 align_free(rast->tasks[i].tile.color[cbuf]);
938 }
939
940 /* for synchronizing rasterization threads */
941 pipe_barrier_destroy( &rast->barrier );
942
943 FREE(rast);
944 }
945
946
947 /** Return number of rasterization threads */
948 unsigned
949 lp_rast_get_num_threads( struct lp_rasterizer *rast )
950 {
951 return rast->num_threads;
952 }