llvmpipe: generate two shader varients, one omits triangle in/out testing
[mesa.git] / src / gallium / drivers / llvmpipe / lp_rast.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 #include <limits.h>
29 #include "util/u_memory.h"
30 #include "util/u_math.h"
31 #include "util/u_cpu_detect.h"
32 #include "util/u_surface.h"
33
34 #include "lp_scene_queue.h"
35 #include "lp_debug.h"
36 #include "lp_fence.h"
37 #include "lp_rast.h"
38 #include "lp_rast_priv.h"
39 #include "lp_tile_soa.h"
40 #include "lp_bld_debug.h"
41 #include "lp_scene.h"
42
43
44 /**
45 * Begin the rasterization phase.
46 * Map the framebuffer surfaces. Initialize the 'rast' state.
47 */
48 static boolean
49 lp_rast_begin( struct lp_rasterizer *rast,
50 const struct pipe_framebuffer_state *fb,
51 boolean write_color,
52 boolean write_zstencil )
53 {
54 struct pipe_screen *screen = rast->screen;
55 struct pipe_surface *cbuf, *zsbuf;
56 int i;
57
58 LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__);
59
60 util_copy_framebuffer_state(&rast->state.fb, fb);
61
62 rast->state.write_zstencil = write_zstencil;
63 rast->state.write_color = write_color;
64
65 rast->check_for_clipped_tiles = (fb->width % TILE_SIZE != 0 ||
66 fb->height % TILE_SIZE != 0);
67
68
69 for (i = 0; i < rast->state.fb.nr_cbufs; i++) {
70 cbuf = rast->state.fb.cbufs[i];
71 if (cbuf) {
72 rast->cbuf_transfer[i] = screen->get_tex_transfer(rast->screen,
73 cbuf->texture,
74 cbuf->face,
75 cbuf->level,
76 cbuf->zslice,
77 PIPE_TRANSFER_READ_WRITE,
78 0, 0,
79 cbuf->width,
80 cbuf->height);
81 if (!rast->cbuf_transfer[i])
82 goto fail;
83
84 rast->cbuf_map[i] = screen->transfer_map(rast->screen,
85 rast->cbuf_transfer[i]);
86 if (!rast->cbuf_map[i])
87 goto fail;
88 }
89 }
90
91 zsbuf = rast->state.fb.zsbuf;
92 if (zsbuf) {
93 rast->zsbuf_transfer = screen->get_tex_transfer(rast->screen,
94 zsbuf->texture,
95 zsbuf->face,
96 zsbuf->level,
97 zsbuf->zslice,
98 PIPE_TRANSFER_READ_WRITE,
99 0, 0,
100 zsbuf->width,
101 zsbuf->height);
102 if (!rast->zsbuf_transfer)
103 goto fail;
104
105 rast->zsbuf_map = screen->transfer_map(rast->screen,
106 rast->zsbuf_transfer);
107 if (!rast->zsbuf_map)
108 goto fail;
109 }
110
111 return TRUE;
112
113 fail:
114 /* Unmap and release transfers?
115 */
116 return FALSE;
117 }
118
119
120 /**
121 * Finish the rasterization phase.
122 * Unmap framebuffer surfaces.
123 */
124 static void
125 lp_rast_end( struct lp_rasterizer *rast )
126 {
127 struct pipe_screen *screen = rast->screen;
128 unsigned i;
129
130 for (i = 0; i < rast->state.fb.nr_cbufs; i++) {
131 if (rast->cbuf_map[i])
132 screen->transfer_unmap(screen, rast->cbuf_transfer[i]);
133
134 if (rast->cbuf_transfer[i])
135 screen->tex_transfer_destroy(rast->cbuf_transfer[i]);
136
137 rast->cbuf_transfer[i] = NULL;
138 rast->cbuf_map[i] = NULL;
139 }
140
141 if (rast->zsbuf_map)
142 screen->transfer_unmap(screen, rast->zsbuf_transfer);
143
144 if (rast->zsbuf_transfer)
145 screen->tex_transfer_destroy(rast->zsbuf_transfer);
146
147 rast->zsbuf_transfer = NULL;
148 rast->zsbuf_map = NULL;
149 }
150
151
152 /**
153 * Begining rasterization of a tile.
154 * \param x window X position of the tile, in pixels
155 * \param y window Y position of the tile, in pixels
156 */
157 static void
158 lp_rast_start_tile( struct lp_rasterizer *rast,
159 unsigned thread_index,
160 unsigned x, unsigned y )
161 {
162 LP_DBG(DEBUG_RAST, "%s %d,%d\n", __FUNCTION__, x, y);
163
164 rast->tasks[thread_index].x = x;
165 rast->tasks[thread_index].y = y;
166 }
167
168
169 /**
170 * Clear the rasterizer's current color tile.
171 * This is a bin command called during bin processing.
172 */
173 void lp_rast_clear_color( struct lp_rasterizer *rast,
174 unsigned thread_index,
175 const union lp_rast_cmd_arg arg )
176 {
177 const uint8_t *clear_color = arg.clear_color;
178 uint8_t **color_tile = rast->tasks[thread_index].tile.color;
179 unsigned i;
180
181 LP_DBG(DEBUG_RAST, "%s 0x%x,0x%x,0x%x,0x%x\n", __FUNCTION__,
182 clear_color[0],
183 clear_color[1],
184 clear_color[2],
185 clear_color[3]);
186
187 if (clear_color[0] == clear_color[1] &&
188 clear_color[1] == clear_color[2] &&
189 clear_color[2] == clear_color[3]) {
190 /* clear to grayscale value {x, x, x, x} */
191 for (i = 0; i < rast->state.fb.nr_cbufs; i++) {
192 memset(color_tile[i], clear_color[0], TILE_SIZE * TILE_SIZE * 4);
193 }
194 }
195 else {
196 /* Non-gray color.
197 * Note: if the swizzled tile layout changes (see TILE_PIXEL) this code
198 * will need to change. It'll be pretty obvious when clearing no longer
199 * works.
200 */
201 const unsigned chunk = TILE_SIZE / 4;
202 for (i = 0; i < rast->state.fb.nr_cbufs; i++) {
203 uint8_t *c = color_tile[i];
204 unsigned j;
205 for (j = 0; j < 4 * TILE_SIZE; j++) {
206 memset(c, clear_color[0], chunk);
207 c += chunk;
208 memset(c, clear_color[1], chunk);
209 c += chunk;
210 memset(c, clear_color[2], chunk);
211 c += chunk;
212 memset(c, clear_color[3], chunk);
213 c += chunk;
214 }
215 assert(c - color_tile[i] == TILE_SIZE * TILE_SIZE * 4);
216 }
217 }
218 }
219
220
221 /**
222 * Clear the rasterizer's current z/stencil tile.
223 * This is a bin command called during bin processing.
224 */
225 void lp_rast_clear_zstencil( struct lp_rasterizer *rast,
226 unsigned thread_index,
227 const union lp_rast_cmd_arg arg)
228 {
229 unsigned i;
230 uint32_t *depth_tile = rast->tasks[thread_index].tile.depth;
231
232 LP_DBG(DEBUG_RAST, "%s 0x%x\n", __FUNCTION__, arg.clear_zstencil);
233
234 for (i = 0; i < TILE_SIZE * TILE_SIZE; i++)
235 depth_tile[i] = arg.clear_zstencil;
236 }
237
238
239 /**
240 * Load tile color from the framebuffer surface.
241 * This is a bin command called during bin processing.
242 */
243 void lp_rast_load_color( struct lp_rasterizer *rast,
244 unsigned thread_index,
245 const union lp_rast_cmd_arg arg)
246 {
247 struct lp_rasterizer_task *task = &rast->tasks[thread_index];
248 const unsigned x = task->x;
249 const unsigned y = task->y;
250 unsigned i;
251
252 LP_DBG(DEBUG_RAST, "%s at %u, %u\n", __FUNCTION__, x, y);
253
254 for (i = 0; i < rast->state.fb.nr_cbufs; i++) {
255 struct pipe_transfer *transfer = rast->cbuf_transfer[i];
256 int w = TILE_SIZE;
257 int h = TILE_SIZE;
258
259 if (x >= transfer->width)
260 continue;
261
262 if (y >= transfer->height)
263 continue;
264 /* XXX: require tile-size aligned render target dimensions:
265 */
266 if (x + w > transfer->width)
267 w -= x + w - transfer->width;
268
269 if (y + h > transfer->height)
270 h -= y + h - transfer->height;
271
272 assert(w >= 0);
273 assert(h >= 0);
274 assert(w <= TILE_SIZE);
275 assert(h <= TILE_SIZE);
276
277 lp_tile_read_4ub(transfer->texture->format,
278 rast->tasks[thread_index].tile.color[i],
279 rast->cbuf_map[i],
280 transfer->stride,
281 x, y,
282 w, h);
283 }
284 }
285
286
287 static void
288 lp_tile_read_z32(uint32_t *tile,
289 const uint8_t *map,
290 unsigned map_stride,
291 unsigned x0, unsigned y0, unsigned w, unsigned h)
292 {
293 unsigned x, y;
294 const uint8_t *map_row = map + y0*map_stride;
295 for (y = 0; y < h; ++y) {
296 const uint32_t *map_pixel = (uint32_t *)(map_row + x0*4);
297 for (x = 0; x < w; ++x) {
298 *tile++ = *map_pixel++;
299 }
300 map_row += map_stride;
301 }
302 }
303
304 /**
305 * Load tile z/stencil from the framebuffer surface.
306 * This is a bin command called during bin processing.
307 */
308 void lp_rast_load_zstencil( struct lp_rasterizer *rast,
309 unsigned thread_index,
310 const union lp_rast_cmd_arg arg )
311 {
312 const unsigned x = rast->tasks[thread_index].x;
313 const unsigned y = rast->tasks[thread_index].y;
314 unsigned w = TILE_SIZE;
315 unsigned h = TILE_SIZE;
316
317 if (x + w > rast->state.fb.width)
318 w -= x + w - rast->state.fb.width;
319
320 if (y + h > rast->state.fb.height)
321 h -= y + h - rast->state.fb.height;
322
323 LP_DBG(DEBUG_RAST, "%s %d,%d %dx%d\n", __FUNCTION__, x, y, w, h);
324
325 assert(rast->zsbuf_transfer->texture->format == PIPE_FORMAT_Z32_UNORM);
326 lp_tile_read_z32(rast->tasks[thread_index].tile.depth,
327 rast->zsbuf_map,
328 rast->zsbuf_transfer->stride,
329 x, y, w, h);
330 }
331
332
333 void lp_rast_set_state( struct lp_rasterizer *rast,
334 unsigned thread_index,
335 const union lp_rast_cmd_arg arg )
336 {
337 const struct lp_rast_state *state = arg.set_state;
338
339 LP_DBG(DEBUG_RAST, "%s %p\n", __FUNCTION__, (void *) state);
340
341 /* just set the current state pointer for this rasterizer */
342 rast->tasks[thread_index].current_state = state;
343 }
344
345
346
347 /**
348 * Run the shader on all blocks in a tile. This is used when a tile is
349 * completely contained inside a triangle.
350 * This is a bin command called during bin processing.
351 */
352 void lp_rast_shade_tile( struct lp_rasterizer *rast,
353 unsigned thread_index,
354 const union lp_rast_cmd_arg arg )
355 {
356 const struct lp_rast_state *state = rast->tasks[thread_index].current_state;
357 struct lp_rast_tile *tile = &rast->tasks[thread_index].tile;
358 const struct lp_rast_shader_inputs *inputs = arg.shade_tile;
359 const unsigned tile_x = rast->tasks[thread_index].x;
360 const unsigned tile_y = rast->tasks[thread_index].y;
361 unsigned x, y;
362
363 LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__);
364
365 /* render the whole 64x64 tile in 4x4 chunks */
366 for (y = 0; y < TILE_SIZE; y += 4){
367 for (x = 0; x < TILE_SIZE; x += 4) {
368 uint8_t *color[PIPE_MAX_COLOR_BUFS];
369 uint32_t *depth;
370 unsigned block_offset, i;
371
372 /* offset of the 16x16 pixel block within the tile */
373 block_offset = ((y / 4) * (16 * 16) + (x / 4) * 16);
374
375 /* color buffer */
376 for (i = 0; i < rast->state.fb.nr_cbufs; i++)
377 color[i] = tile->color[i] + 4 * block_offset;
378
379 /* depth buffer */
380 depth = tile->depth + block_offset;
381
382 /* run shader */
383 state->jit_function[0]( &state->jit_context,
384 tile_x + x, tile_y + y,
385 inputs->a0,
386 inputs->dadx,
387 inputs->dady,
388 color,
389 depth,
390 INT_MIN, INT_MIN, INT_MIN,
391 NULL, NULL, NULL );
392 }
393 }
394 }
395
396
397 /**
398 * Compute shading for a 4x4 block of pixels.
399 * This is a bin command called during bin processing.
400 */
401 void lp_rast_shade_quads( struct lp_rasterizer *rast,
402 unsigned thread_index,
403 const struct lp_rast_shader_inputs *inputs,
404 unsigned x, unsigned y,
405 int32_t c1, int32_t c2, int32_t c3)
406 {
407 const struct lp_rast_state *state = rast->tasks[thread_index].current_state;
408 struct lp_rast_tile *tile = &rast->tasks[thread_index].tile;
409 uint8_t *color[PIPE_MAX_COLOR_BUFS];
410 void *depth;
411 unsigned i;
412 unsigned ix, iy;
413 int block_offset;
414
415 #ifdef DEBUG
416 assert(state);
417
418 /* Sanity checks */
419 assert(x % TILE_VECTOR_WIDTH == 0);
420 assert(y % TILE_VECTOR_HEIGHT == 0);
421
422 assert((x % 4) == 0);
423 assert((y % 4) == 0);
424 #endif
425
426 ix = x % TILE_SIZE;
427 iy = y % TILE_SIZE;
428
429 /* offset of the 16x16 pixel block within the tile */
430 block_offset = ((iy / 4) * (16 * 16) + (ix / 4) * 16);
431
432 /* color buffer */
433 for (i = 0; i < rast->state.fb.nr_cbufs; i++)
434 color[i] = tile->color[i] + 4 * block_offset;
435
436 /* depth buffer */
437 depth = tile->depth + block_offset;
438
439
440
441 #ifdef DEBUG
442 assert(lp_check_alignment(tile->depth, 16));
443 assert(lp_check_alignment(tile->color[0], 16));
444 assert(lp_check_alignment(state->jit_context.blend_color, 16));
445
446 assert(lp_check_alignment(inputs->step[0], 16));
447 assert(lp_check_alignment(inputs->step[1], 16));
448 assert(lp_check_alignment(inputs->step[2], 16));
449 #endif
450
451 /* run shader */
452 state->jit_function[1]( &state->jit_context,
453 x, y,
454 inputs->a0,
455 inputs->dadx,
456 inputs->dady,
457 color,
458 depth,
459 c1, c2, c3,
460 inputs->step[0], inputs->step[1], inputs->step[2]);
461 }
462
463
464
465
466 /**
467 * Write the rasterizer's color tile to the framebuffer.
468 */
469 static void lp_rast_store_color( struct lp_rasterizer *rast,
470 unsigned thread_index)
471 {
472 const unsigned x = rast->tasks[thread_index].x;
473 const unsigned y = rast->tasks[thread_index].y;
474 unsigned i;
475
476 for (i = 0; i < rast->state.fb.nr_cbufs; i++) {
477 struct pipe_transfer *transfer = rast->cbuf_transfer[i];
478 int w = TILE_SIZE;
479 int h = TILE_SIZE;
480
481 if (x >= transfer->width)
482 continue;
483
484 if (y >= transfer->height)
485 continue;
486
487 /* XXX: require tile-size aligned render target dimensions:
488 */
489 if (x + w > transfer->width)
490 w -= x + w - transfer->width;
491
492 if (y + h > transfer->height)
493 h -= y + h - transfer->height;
494
495 assert(w >= 0);
496 assert(h >= 0);
497 assert(w <= TILE_SIZE);
498 assert(h <= TILE_SIZE);
499
500 LP_DBG(DEBUG_RAST, "%s [%u] %d,%d %dx%d\n", __FUNCTION__,
501 thread_index, x, y, w, h);
502
503 lp_tile_write_4ub(transfer->texture->format,
504 rast->tasks[thread_index].tile.color[i],
505 rast->cbuf_map[i],
506 transfer->stride,
507 x, y,
508 w, h);
509 }
510 }
511
512
513 static void
514 lp_tile_write_z32(const uint32_t *src, uint8_t *dst, unsigned dst_stride,
515 unsigned x0, unsigned y0, unsigned w, unsigned h)
516 {
517 unsigned x, y;
518 uint8_t *dst_row = dst + y0*dst_stride;
519 for (y = 0; y < h; ++y) {
520 uint32_t *dst_pixel = (uint32_t *)(dst_row + x0*4);
521 for (x = 0; x < w; ++x) {
522 *dst_pixel++ = *src++;
523 }
524 dst_row += dst_stride;
525 }
526 }
527
528 /**
529 * Write the rasterizer's z/stencil tile to the framebuffer.
530 */
531 static void lp_rast_store_zstencil( struct lp_rasterizer *rast,
532 unsigned thread_index )
533 {
534 const unsigned x = rast->tasks[thread_index].x;
535 const unsigned y = rast->tasks[thread_index].y;
536 unsigned w = TILE_SIZE;
537 unsigned h = TILE_SIZE;
538
539 if (x + w > rast->state.fb.width)
540 w -= x + w - rast->state.fb.width;
541
542 if (y + h > rast->state.fb.height)
543 h -= y + h - rast->state.fb.height;
544
545 LP_DBG(DEBUG_RAST, "%s %d,%d %dx%d\n", __FUNCTION__, x, y, w, h);
546
547 assert(rast->zsbuf_transfer->texture->format == PIPE_FORMAT_Z32_UNORM);
548 lp_tile_write_z32(rast->tasks[thread_index].tile.depth,
549 rast->zsbuf_map,
550 rast->zsbuf_transfer->stride,
551 x, y, w, h);
552 }
553
554
555 /**
556 * Write the rasterizer's tiles to the framebuffer.
557 */
558 static void
559 lp_rast_end_tile( struct lp_rasterizer *rast,
560 unsigned thread_index )
561 {
562 LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__);
563
564 if (rast->state.write_color)
565 lp_rast_store_color(rast, thread_index);
566
567 if (rast->state.write_zstencil)
568 lp_rast_store_zstencil(rast, thread_index);
569 }
570
571
572 /**
573 * Signal on a fence. This is called during bin execution/rasterization.
574 * Called per thread.
575 */
576 void lp_rast_fence( struct lp_rasterizer *rast,
577 unsigned thread_index,
578 const union lp_rast_cmd_arg arg )
579 {
580 struct lp_fence *fence = arg.fence;
581
582 pipe_mutex_lock( fence->mutex );
583
584 fence->count++;
585 assert(fence->count <= fence->rank);
586
587 LP_DBG(DEBUG_RAST, "%s count=%u rank=%u\n", __FUNCTION__,
588 fence->count, fence->rank);
589
590 pipe_condvar_signal( fence->signalled );
591
592 pipe_mutex_unlock( fence->mutex );
593 }
594
595
596 /**
597 * When all the threads are done rasterizing a scene, one thread will
598 * call this function to reset the scene and put it onto the empty queue.
599 */
600 static void
601 release_scene( struct lp_rasterizer *rast,
602 struct lp_scene *scene )
603 {
604 util_unreference_framebuffer_state( &scene->fb );
605
606 lp_scene_reset( scene );
607 lp_scene_enqueue( rast->empty_scenes, scene );
608 rast->curr_scene = NULL;
609 }
610
611
612 /**
613 * Rasterize commands for a single bin.
614 * \param x, y position of the bin's tile in the framebuffer
615 * Must be called between lp_rast_begin() and lp_rast_end().
616 * Called per thread.
617 */
618 static void
619 rasterize_bin( struct lp_rasterizer *rast,
620 unsigned thread_index,
621 const struct cmd_bin *bin,
622 int x, int y)
623 {
624 const struct cmd_block_list *commands = &bin->commands;
625 struct cmd_block *block;
626 unsigned k;
627
628 lp_rast_start_tile( rast, thread_index, x, y );
629
630 /* simply execute each of the commands in the block list */
631 for (block = commands->head; block; block = block->next) {
632 for (k = 0; k < block->count; k++) {
633 block->cmd[k]( rast, thread_index, block->arg[k] );
634 }
635 }
636
637 lp_rast_end_tile( rast, thread_index );
638 }
639
640
641 #define RAST(x) { lp_rast_##x, #x }
642
643 static struct {
644 lp_rast_cmd cmd;
645 const char *name;
646 } cmd_names[] =
647 {
648 RAST(load_color),
649 RAST(load_zstencil),
650 RAST(clear_color),
651 RAST(clear_zstencil),
652 RAST(triangle),
653 RAST(shade_tile),
654 RAST(set_state),
655 RAST(fence),
656 };
657
658 static void
659 debug_bin( const struct cmd_bin *bin )
660 {
661 const struct cmd_block *head = bin->commands.head;
662 int i, j;
663
664 for (i = 0; i < head->count; i++) {
665 debug_printf("%d: ", i);
666 for (j = 0; j < Elements(cmd_names); j++) {
667 if (head->cmd[i] == cmd_names[j].cmd) {
668 debug_printf("%s\n", cmd_names[j].name);
669 break;
670 }
671 }
672 if (j == Elements(cmd_names))
673 debug_printf("...other\n");
674 }
675
676 }
677
678 /* An empty bin is one that just loads the contents of the tile and
679 * stores them again unchanged. This typically happens when bins have
680 * been flushed for some reason in the middle of a frame, or when
681 * incremental updates are being made to a render target.
682 *
683 * Try to avoid doing pointless work in this case.
684 */
685 static boolean
686 is_empty_bin( const struct cmd_bin *bin )
687 {
688 const struct cmd_block *head = bin->commands.head;
689 int i;
690
691 if (0)
692 debug_bin(bin);
693
694 /* We emit at most two load-tile commands at the start of the first
695 * command block. In addition we seem to emit a couple of
696 * set-state commands even in empty bins.
697 *
698 * As a heuristic, if a bin has more than 4 commands, consider it
699 * non-empty.
700 */
701 if (head->next != NULL ||
702 head->count > 4) {
703 return FALSE;
704 }
705
706 for (i = 0; i < head->count; i++)
707 if (head->cmd[i] != lp_rast_load_color &&
708 head->cmd[i] != lp_rast_load_zstencil &&
709 head->cmd[i] != lp_rast_set_state) {
710 return FALSE;
711 }
712
713 return TRUE;
714 }
715
716
717
718 /**
719 * Rasterize/execute all bins within a scene.
720 * Called per thread.
721 */
722 static void
723 rasterize_scene( struct lp_rasterizer *rast,
724 unsigned thread_index,
725 struct lp_scene *scene,
726 bool write_depth )
727 {
728 /* loop over scene bins, rasterize each */
729 #if 0
730 {
731 unsigned i, j;
732 for (i = 0; i < scene->tiles_x; i++) {
733 for (j = 0; j < scene->tiles_y; j++) {
734 struct cmd_bin *bin = lp_get_bin(scene, i, j);
735 rasterize_bin( rast, thread_index,
736 bin, i * TILE_SIZE, j * TILE_SIZE );
737 }
738 }
739 }
740 #else
741 {
742 struct cmd_bin *bin;
743 int x, y;
744
745 assert(scene);
746 while ((bin = lp_scene_bin_iter_next(scene, &x, &y))) {
747 if (!is_empty_bin( bin ))
748 rasterize_bin( rast, thread_index, bin, x * TILE_SIZE, y * TILE_SIZE);
749 }
750 }
751 #endif
752 }
753
754
755 /**
756 * Called by setup module when it has something for us to render.
757 */
758 void
759 lp_rasterize_scene( struct lp_rasterizer *rast,
760 struct lp_scene *scene,
761 const struct pipe_framebuffer_state *fb,
762 bool write_depth )
763 {
764 boolean debug = false;
765
766 LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__);
767
768 if (debug) {
769 unsigned x, y;
770 printf("rasterize scene:\n");
771 printf(" data size: %u\n", lp_scene_data_size(scene));
772 for (y = 0; y < scene->tiles_y; y++) {
773 for (x = 0; x < scene->tiles_x; x++) {
774 printf(" bin %u, %u size: %u\n", x, y,
775 lp_scene_bin_size(scene, x, y));
776 }
777 }
778 }
779
780 /* save framebuffer state in the bin */
781 util_copy_framebuffer_state(&scene->fb, fb);
782 scene->write_depth = write_depth;
783
784 if (rast->num_threads == 0) {
785 /* no threading */
786
787 lp_rast_begin( rast, fb,
788 fb->nr_cbufs != 0, /* always write color if cbufs present */
789 fb->zsbuf != NULL && write_depth );
790
791 lp_scene_bin_iter_begin( scene );
792 rasterize_scene( rast, 0, scene, write_depth );
793
794 release_scene( rast, scene );
795
796 lp_rast_end( rast );
797 }
798 else {
799 /* threaded rendering! */
800 unsigned i;
801
802 lp_scene_enqueue( rast->full_scenes, scene );
803
804 /* signal the threads that there's work to do */
805 for (i = 0; i < rast->num_threads; i++) {
806 pipe_semaphore_signal(&rast->tasks[i].work_ready);
807 }
808
809 /* wait for work to complete */
810 for (i = 0; i < rast->num_threads; i++) {
811 pipe_semaphore_wait(&rast->tasks[i].work_done);
812 }
813 }
814
815 LP_DBG(DEBUG_SETUP, "%s done \n", __FUNCTION__);
816 }
817
818
819 /**
820 * This is the thread's main entrypoint.
821 * It's a simple loop:
822 * 1. wait for work
823 * 2. do work
824 * 3. signal that we're done
825 */
826 static void *
827 thread_func( void *init_data )
828 {
829 struct lp_rasterizer_task *task = (struct lp_rasterizer_task *) init_data;
830 struct lp_rasterizer *rast = task->rast;
831 boolean debug = false;
832
833 while (1) {
834 /* wait for work */
835 if (debug)
836 debug_printf("thread %d waiting for work\n", task->thread_index);
837 pipe_semaphore_wait(&task->work_ready);
838
839 if (task->thread_index == 0) {
840 /* thread[0]:
841 * - get next scene to rasterize
842 * - map the framebuffer surfaces
843 */
844 const struct pipe_framebuffer_state *fb;
845 boolean write_depth;
846
847 rast->curr_scene = lp_scene_dequeue( rast->full_scenes );
848
849 lp_scene_bin_iter_begin( rast->curr_scene );
850
851 fb = &rast->curr_scene->fb;
852 write_depth = rast->curr_scene->write_depth;
853
854 lp_rast_begin( rast, fb,
855 fb->nr_cbufs != 0,
856 fb->zsbuf != NULL && write_depth );
857 }
858
859 /* Wait for all threads to get here so that threads[1+] don't
860 * get a null rast->curr_scene pointer.
861 */
862 pipe_barrier_wait( &rast->barrier );
863
864 /* do work */
865 if (debug)
866 debug_printf("thread %d doing work\n", task->thread_index);
867 rasterize_scene(rast,
868 task->thread_index,
869 rast->curr_scene,
870 rast->curr_scene->write_depth);
871
872 /* wait for all threads to finish with this scene */
873 pipe_barrier_wait( &rast->barrier );
874
875 if (task->thread_index == 0) {
876 /* thread[0]:
877 * - release the scene object
878 * - unmap the framebuffer surfaces
879 */
880 release_scene( rast, rast->curr_scene );
881 lp_rast_end( rast );
882 }
883
884 /* signal done with work */
885 if (debug)
886 debug_printf("thread %d done working\n", task->thread_index);
887 pipe_semaphore_signal(&task->work_done);
888 }
889
890 return NULL;
891 }
892
893
894 /**
895 * Initialize semaphores and spawn the threads.
896 */
897 static void
898 create_rast_threads(struct lp_rasterizer *rast)
899 {
900 unsigned i;
901
902 rast->num_threads = util_cpu_caps.nr_cpus;
903 rast->num_threads = debug_get_num_option("LP_NUM_THREADS", rast->num_threads);
904 rast->num_threads = MIN2(rast->num_threads, MAX_THREADS);
905
906 /* NOTE: if num_threads is zero, we won't use any threads */
907 for (i = 0; i < rast->num_threads; i++) {
908 pipe_semaphore_init(&rast->tasks[i].work_ready, 0);
909 pipe_semaphore_init(&rast->tasks[i].work_done, 0);
910 rast->threads[i] = pipe_thread_create(thread_func,
911 (void *) &rast->tasks[i]);
912 }
913 }
914
915
916
917 /**
918 * Create new lp_rasterizer.
919 * \param empty the queue to put empty scenes on after we've finished
920 * processing them.
921 */
922 struct lp_rasterizer *
923 lp_rast_create( struct pipe_screen *screen, struct lp_scene_queue *empty )
924 {
925 struct lp_rasterizer *rast;
926 unsigned i, cbuf;
927
928 rast = CALLOC_STRUCT(lp_rasterizer);
929 if(!rast)
930 return NULL;
931
932 rast->screen = screen;
933
934 rast->empty_scenes = empty;
935 rast->full_scenes = lp_scene_queue_create();
936
937 for (i = 0; i < Elements(rast->tasks); i++) {
938 for (cbuf = 0; cbuf < PIPE_MAX_COLOR_BUFS; cbuf++ )
939 rast->tasks[i].tile.color[cbuf] = align_malloc( TILE_SIZE*TILE_SIZE*4, 16 );
940
941 rast->tasks[i].tile.depth = align_malloc( TILE_SIZE*TILE_SIZE*4, 16 );
942 rast->tasks[i].rast = rast;
943 rast->tasks[i].thread_index = i;
944 }
945
946 create_rast_threads(rast);
947
948 /* for synchronizing rasterization threads */
949 pipe_barrier_init( &rast->barrier, rast->num_threads );
950
951 return rast;
952 }
953
954
955 /* Shutdown:
956 */
957 void lp_rast_destroy( struct lp_rasterizer *rast )
958 {
959 unsigned i, cbuf;
960
961 util_unreference_framebuffer_state(&rast->state.fb);
962
963 for (i = 0; i < Elements(rast->tasks); i++) {
964 align_free(rast->tasks[i].tile.depth);
965 for (cbuf = 0; cbuf < PIPE_MAX_COLOR_BUFS; cbuf++ )
966 align_free(rast->tasks[i].tile.color[cbuf]);
967 }
968
969 /* for synchronizing rasterization threads */
970 pipe_barrier_destroy( &rast->barrier );
971
972 FREE(rast);
973 }
974
975
976 /** Return number of rasterization threads */
977 unsigned
978 lp_rast_get_num_threads( struct lp_rasterizer *rast )
979 {
980 return rast->num_threads;
981 }