0130e39fd8c512b93e20252e06c83ba7102d88c7
[mesa.git] / src / gallium / drivers / llvmpipe / lp_rast.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 #include <limits.h>
29 #include "util/u_memory.h"
30 #include "util/u_math.h"
31 #include "util/u_rect.h"
32 #include "util/u_surface.h"
33
34 #include "lp_scene_queue.h"
35 #include "lp_debug.h"
36 #include "lp_fence.h"
37 #include "lp_perf.h"
38 #include "lp_query.h"
39 #include "lp_rast.h"
40 #include "lp_rast_priv.h"
41 #include "lp_tile_soa.h"
42 #include "gallivm/lp_bld_debug.h"
43 #include "lp_scene.h"
44
45
46 /**
47 * Begin rasterizing a scene.
48 * Called once per scene by one thread.
49 */
50 static void
51 lp_rast_begin( struct lp_rasterizer *rast,
52 struct lp_scene *scene )
53 {
54 const struct pipe_framebuffer_state *fb = &scene->fb;
55 int i;
56
57 rast->curr_scene = scene;
58
59 LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__);
60
61 rast->state.nr_cbufs = scene->fb.nr_cbufs;
62
63 for (i = 0; i < rast->state.nr_cbufs; i++) {
64 struct pipe_surface *cbuf = scene->fb.cbufs[i];
65 llvmpipe_resource_map(cbuf->texture,
66 cbuf->face,
67 cbuf->level,
68 cbuf->zslice,
69 LP_TEX_USAGE_READ_WRITE,
70 LP_TEX_LAYOUT_NONE);
71 }
72
73 if (fb->zsbuf) {
74 struct pipe_surface *zsbuf = scene->fb.zsbuf;
75 rast->zsbuf.stride = llvmpipe_resource_stride(zsbuf->texture, zsbuf->level);
76 rast->zsbuf.blocksize =
77 util_format_get_blocksize(zsbuf->texture->format);
78
79 rast->zsbuf.map = llvmpipe_resource_map(zsbuf->texture,
80 zsbuf->face,
81 zsbuf->level,
82 zsbuf->zslice,
83 LP_TEX_USAGE_READ_WRITE,
84 LP_TEX_LAYOUT_NONE);
85 }
86
87 lp_scene_bin_iter_begin( scene );
88 }
89
90
91 static void
92 lp_rast_end( struct lp_rasterizer *rast )
93 {
94 struct lp_scene *scene = rast->curr_scene;
95 unsigned i;
96
97 /* Unmap color buffers */
98 for (i = 0; i < rast->state.nr_cbufs; i++) {
99 struct pipe_surface *cbuf = scene->fb.cbufs[i];
100 llvmpipe_resource_unmap(cbuf->texture,
101 cbuf->face,
102 cbuf->level,
103 cbuf->zslice);
104 }
105
106 /* Unmap z/stencil buffer */
107 if (rast->zsbuf.map) {
108 struct pipe_surface *zsbuf = scene->fb.zsbuf;
109 llvmpipe_resource_unmap(zsbuf->texture,
110 zsbuf->face,
111 zsbuf->level,
112 zsbuf->zslice);
113 rast->zsbuf.map = NULL;
114 }
115
116 lp_scene_reset( rast->curr_scene );
117
118 rast->curr_scene = NULL;
119
120 #ifdef DEBUG
121 if (0)
122 debug_printf("Post render scene: tile unswizzle: %u tile swizzle: %u\n",
123 lp_tile_unswizzle_count, lp_tile_swizzle_count);
124 #endif
125 }
126
127
128 /**
129 * Begining rasterization of a tile.
130 * \param x window X position of the tile, in pixels
131 * \param y window Y position of the tile, in pixels
132 */
133 static void
134 lp_rast_tile_begin(struct lp_rasterizer_task *task,
135 unsigned x, unsigned y)
136 {
137 struct lp_rasterizer *rast = task->rast;
138 struct lp_scene *scene = rast->curr_scene;
139 enum lp_texture_usage usage;
140
141 LP_DBG(DEBUG_RAST, "%s %d,%d\n", __FUNCTION__, x, y);
142
143 assert(x % TILE_SIZE == 0);
144 assert(y % TILE_SIZE == 0);
145
146 task->x = x;
147 task->y = y;
148
149 /* reset pointers to color tile(s) */
150 memset(task->color_tiles, 0, sizeof(task->color_tiles));
151
152 /* get pointer to depth/stencil tile */
153 {
154 struct pipe_surface *zsbuf = rast->curr_scene->fb.zsbuf;
155 if (zsbuf) {
156 struct llvmpipe_resource *lpt = llvmpipe_resource(zsbuf->texture);
157
158 if (scene->has_depthstencil_clear)
159 usage = LP_TEX_USAGE_WRITE_ALL;
160 else
161 usage = LP_TEX_USAGE_READ_WRITE;
162
163 /* "prime" the tile: convert data from linear to tiled if necessary
164 * and update the tile's layout info.
165 */
166 (void) llvmpipe_get_texture_tile(lpt,
167 zsbuf->face + zsbuf->zslice,
168 zsbuf->level,
169 usage,
170 x, y);
171 /* Get actual pointer to the tile data. Note that depth/stencil
172 * data is tiled differently than color data.
173 */
174 task->depth_tile = lp_rast_get_depth_block_pointer(task, x, y);
175
176 assert(task->depth_tile);
177 }
178 else {
179 task->depth_tile = NULL;
180 }
181 }
182 }
183
184
185 /**
186 * Clear the rasterizer's current color tile.
187 * This is a bin command called during bin processing.
188 */
189 void
190 lp_rast_clear_color(struct lp_rasterizer_task *task,
191 const union lp_rast_cmd_arg arg)
192 {
193 struct lp_rasterizer *rast = task->rast;
194 const uint8_t *clear_color = arg.clear_color;
195
196 unsigned i;
197
198 LP_DBG(DEBUG_RAST, "%s 0x%x,0x%x,0x%x,0x%x\n", __FUNCTION__,
199 clear_color[0],
200 clear_color[1],
201 clear_color[2],
202 clear_color[3]);
203
204 if (clear_color[0] == clear_color[1] &&
205 clear_color[1] == clear_color[2] &&
206 clear_color[2] == clear_color[3]) {
207 /* clear to grayscale value {x, x, x, x} */
208 for (i = 0; i < rast->state.nr_cbufs; i++) {
209 uint8_t *ptr =
210 lp_rast_get_color_tile_pointer(task, i, LP_TEX_USAGE_WRITE_ALL);
211 memset(ptr, clear_color[0], TILE_SIZE * TILE_SIZE * 4);
212 }
213 }
214 else {
215 /* Non-gray color.
216 * Note: if the swizzled tile layout changes (see TILE_PIXEL) this code
217 * will need to change. It'll be pretty obvious when clearing no longer
218 * works.
219 */
220 const unsigned chunk = TILE_SIZE / 4;
221 for (i = 0; i < rast->state.nr_cbufs; i++) {
222 uint8_t *c =
223 lp_rast_get_color_tile_pointer(task, i, LP_TEX_USAGE_WRITE_ALL);
224 unsigned j;
225
226 for (j = 0; j < 4 * TILE_SIZE; j++) {
227 memset(c, clear_color[0], chunk);
228 c += chunk;
229 memset(c, clear_color[1], chunk);
230 c += chunk;
231 memset(c, clear_color[2], chunk);
232 c += chunk;
233 memset(c, clear_color[3], chunk);
234 c += chunk;
235 }
236 }
237 }
238
239 LP_COUNT(nr_color_tile_clear);
240 }
241
242
243 /**
244 * Clear the rasterizer's current z/stencil tile.
245 * This is a bin command called during bin processing.
246 */
247 void
248 lp_rast_clear_zstencil(struct lp_rasterizer_task *task,
249 const union lp_rast_cmd_arg arg)
250 {
251 struct lp_rasterizer *rast = task->rast;
252 const struct lp_rast_clearzs *clearzs = arg.clear_zstencil;
253 unsigned clear_value = clearzs->clearzs_value;
254 unsigned clear_mask = clearzs->clearzs_mask;
255 const unsigned height = TILE_SIZE / TILE_VECTOR_HEIGHT;
256 const unsigned width = TILE_SIZE * TILE_VECTOR_HEIGHT;
257 const unsigned block_size = rast->zsbuf.blocksize;
258 const unsigned dst_stride = rast->zsbuf.stride * TILE_VECTOR_HEIGHT;
259 uint8_t *dst;
260 unsigned i, j;
261
262 LP_DBG(DEBUG_RAST, "%s 0x%x%x\n", __FUNCTION__, clear_value, clear_mask);
263
264 /*
265 * Clear the aera of the swizzled depth/depth buffer matching this tile, in
266 * stripes of TILE_VECTOR_HEIGHT x TILE_SIZE at a time.
267 *
268 * The swizzled depth format is such that the depths for
269 * TILE_VECTOR_HEIGHT x TILE_VECTOR_WIDTH pixels have consecutive offsets.
270 */
271
272 dst = task->depth_tile;
273
274 if (lp_is_dummy_tile(dst))
275 return;
276
277 assert(dst == lp_rast_get_depth_block_pointer(task, task->x, task->y));
278
279 switch (block_size) {
280 case 1:
281 memset(dst, (uint8_t) clear_value, height * width);
282 break;
283 case 2:
284 for (i = 0; i < height; i++) {
285 uint16_t *row = (uint16_t *)dst;
286 for (j = 0; j < width; j++)
287 *row++ = (uint16_t) clear_value;
288 dst += dst_stride;
289 }
290 break;
291 case 4:
292 if (clear_mask == 0xffffffff) {
293 for (i = 0; i < height; i++) {
294 uint32_t *row = (uint32_t *)dst;
295 for (j = 0; j < width; j++)
296 *row++ = clear_value;
297 dst += dst_stride;
298 }
299 }
300 else {
301 for (i = 0; i < height; i++) {
302 uint32_t *row = (uint32_t *)dst;
303 for (j = 0; j < width; j++) {
304 uint32_t tmp = ~clear_mask & *row;
305 *row++ = (clear_value & clear_mask) | tmp;
306 }
307 dst += dst_stride;
308 }
309 }
310 break;
311 default:
312 assert(0);
313 break;
314 }
315 }
316
317
318 /**
319 * Load tile color from the framebuffer surface.
320 * This is a bin command called during bin processing.
321 */
322 #if 0
323 void
324 lp_rast_load_color(struct lp_rasterizer_task *task,
325 const union lp_rast_cmd_arg arg)
326 {
327 struct lp_rasterizer *rast = task->rast;
328 unsigned buf;
329 enum lp_texture_usage usage;
330
331 LP_DBG(DEBUG_RAST, "%s at %u, %u\n", __FUNCTION__, x, y);
332
333 if (scene->has_color_clear)
334 usage = LP_TEX_USAGE_WRITE_ALL;
335 else
336 usage = LP_TEX_USAGE_READ_WRITE;
337
338 /* Get pointers to color tile(s).
339 * This will convert linear data to tiled if needed.
340 */
341 for (buf = 0; buf < rast->state.nr_cbufs; buf++) {
342 struct pipe_surface *cbuf = rast->curr_scene->fb.cbufs[buf];
343 struct llvmpipe_texture *lpt;
344 assert(cbuf);
345 lpt = llvmpipe_texture(cbuf->texture);
346 task->color_tiles[buf] = llvmpipe_get_texture_tile(lpt,
347 cbuf->face + cbuf->zslice,
348 cbuf->level,
349 usage,
350 task->x, task->y);
351 assert(task->color_tiles[buf]);
352 }
353 }
354 #endif
355
356
357 /**
358 * Convert the color tile from tiled to linear layout.
359 * This is generally only done when we're flushing the scene just prior to
360 * SwapBuffers. If we didn't do this here, we'd have to convert the entire
361 * tiled color buffer to linear layout in the llvmpipe_texture_unmap()
362 * function. It's better to do it here to take advantage of
363 * threading/parallelism.
364 * This is a bin command which is stored in all bins.
365 */
366 void
367 lp_rast_store_linear_color( struct lp_rasterizer_task *task,
368 const union lp_rast_cmd_arg arg)
369 {
370 struct lp_rasterizer *rast = task->rast;
371 struct lp_scene *scene = rast->curr_scene;
372 unsigned buf;
373
374 for (buf = 0; buf < rast->state.nr_cbufs; buf++) {
375 struct pipe_surface *cbuf = scene->fb.cbufs[buf];
376 const unsigned face = cbuf->face, level = cbuf->level;
377 struct llvmpipe_resource *lpt = llvmpipe_resource(cbuf->texture);
378 /* this will convert the tiled data to linear if needed */
379 (void) llvmpipe_get_texture_tile_linear(lpt, face, level,
380 LP_TEX_USAGE_READ,
381 task->x, task->y);
382 }
383 }
384
385
386 /**
387 * This is a bin command called during bin processing.
388 */
389 void
390 lp_rast_set_state(struct lp_rasterizer_task *task,
391 const union lp_rast_cmd_arg arg)
392 {
393 const struct lp_rast_state *state = arg.set_state;
394
395 LP_DBG(DEBUG_RAST, "%s %p\n", __FUNCTION__, (void *) state);
396
397 /* just set the current state pointer for this rasterizer */
398 task->current_state = state;
399 }
400
401
402 /**
403 * Run the shader on all blocks in a tile. This is used when a tile is
404 * completely contained inside a triangle.
405 * This is a bin command called during bin processing.
406 */
407 void
408 lp_rast_shade_tile(struct lp_rasterizer_task *task,
409 const union lp_rast_cmd_arg arg)
410 {
411 struct lp_rasterizer *rast = task->rast;
412 const struct lp_rast_state *state = task->current_state;
413 const struct lp_rast_shader_inputs *inputs = arg.shade_tile;
414 struct lp_fragment_shader_variant *variant = state->variant;
415 const unsigned tile_x = task->x, tile_y = task->y;
416 unsigned x, y;
417
418 LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__);
419
420 /* render the whole 64x64 tile in 4x4 chunks */
421 for (y = 0; y < TILE_SIZE; y += 4){
422 for (x = 0; x < TILE_SIZE; x += 4) {
423 uint8_t *color[PIPE_MAX_COLOR_BUFS];
424 uint32_t *depth;
425 unsigned i;
426
427 /* color buffer */
428 for (i = 0; i < rast->state.nr_cbufs; i++)
429 color[i] = lp_rast_get_color_block_pointer(task, i,
430 tile_x + x, tile_y + y);
431
432 /* depth buffer */
433 depth = lp_rast_get_depth_block_pointer(task, tile_x + x, tile_y + y);
434
435 /* run shader on 4x4 block */
436 variant->jit_function[RAST_WHOLE]( &state->jit_context,
437 tile_x + x, tile_y + y,
438 inputs->facing,
439 inputs->a0,
440 inputs->dadx,
441 inputs->dady,
442 color,
443 depth,
444 0xffff,
445 &task->vis_counter);
446 }
447 }
448 }
449
450
451 /**
452 * Run the shader on all blocks in a tile. This is used when a tile is
453 * completely contained inside a triangle, and the shader is opaque.
454 * This is a bin command called during bin processing.
455 */
456 void
457 lp_rast_shade_tile_opaque(struct lp_rasterizer_task *task,
458 const union lp_rast_cmd_arg arg)
459 {
460 struct lp_rasterizer *rast = task->rast;
461 unsigned i;
462
463 LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__);
464
465 /* this will prevent converting the layout from tiled to linear */
466 for (i = 0; i < rast->state.nr_cbufs; i++) {
467 (void)lp_rast_get_color_tile_pointer(task, i, LP_TEX_USAGE_WRITE_ALL);
468 }
469
470 lp_rast_shade_tile(task, arg);
471 }
472
473
474 /**
475 * Compute shading for a 4x4 block of pixels inside a triangle.
476 * This is a bin command called during bin processing.
477 * \param x X position of quad in window coords
478 * \param y Y position of quad in window coords
479 */
480 void
481 lp_rast_shade_quads_mask(struct lp_rasterizer_task *task,
482 const struct lp_rast_shader_inputs *inputs,
483 unsigned x, unsigned y,
484 unsigned mask)
485 {
486 const struct lp_rast_state *state = task->current_state;
487 struct lp_fragment_shader_variant *variant = state->variant;
488 struct lp_rasterizer *rast = task->rast;
489 uint8_t *color[PIPE_MAX_COLOR_BUFS];
490 void *depth;
491 unsigned i;
492
493 assert(state);
494
495 /* Sanity checks */
496 assert(x % TILE_VECTOR_WIDTH == 0);
497 assert(y % TILE_VECTOR_HEIGHT == 0);
498
499 assert((x % 4) == 0);
500 assert((y % 4) == 0);
501
502 /* color buffer */
503 for (i = 0; i < rast->state.nr_cbufs; i++) {
504 color[i] = lp_rast_get_color_block_pointer(task, i, x, y);
505 assert(lp_check_alignment(color[i], 16));
506 }
507
508 /* depth buffer */
509 depth = lp_rast_get_depth_block_pointer(task, x, y);
510
511
512 assert(lp_check_alignment(state->jit_context.blend_color, 16));
513
514 /* run shader on 4x4 block */
515 variant->jit_function[RAST_EDGE_TEST](&state->jit_context,
516 x, y,
517 inputs->facing,
518 inputs->a0,
519 inputs->dadx,
520 inputs->dady,
521 color,
522 depth,
523 mask,
524 &task->vis_counter);
525 }
526
527
528
529 /**
530 * Set top row and left column of the tile's pixels to white. For debugging.
531 */
532 static void
533 outline_tile(uint8_t *tile)
534 {
535 const uint8_t val = 0xff;
536 unsigned i;
537
538 for (i = 0; i < TILE_SIZE; i++) {
539 TILE_PIXEL(tile, i, 0, 0) = val;
540 TILE_PIXEL(tile, i, 0, 1) = val;
541 TILE_PIXEL(tile, i, 0, 2) = val;
542 TILE_PIXEL(tile, i, 0, 3) = val;
543
544 TILE_PIXEL(tile, 0, i, 0) = val;
545 TILE_PIXEL(tile, 0, i, 1) = val;
546 TILE_PIXEL(tile, 0, i, 2) = val;
547 TILE_PIXEL(tile, 0, i, 3) = val;
548 }
549 }
550
551
552 /**
553 * Draw grid of gray lines at 16-pixel intervals across the tile to
554 * show the sub-tile boundaries. For debugging.
555 */
556 static void
557 outline_subtiles(uint8_t *tile)
558 {
559 const uint8_t val = 0x80;
560 const unsigned step = 16;
561 unsigned i, j;
562
563 for (i = 0; i < TILE_SIZE; i += step) {
564 for (j = 0; j < TILE_SIZE; j++) {
565 TILE_PIXEL(tile, i, j, 0) = val;
566 TILE_PIXEL(tile, i, j, 1) = val;
567 TILE_PIXEL(tile, i, j, 2) = val;
568 TILE_PIXEL(tile, i, j, 3) = val;
569
570 TILE_PIXEL(tile, j, i, 0) = val;
571 TILE_PIXEL(tile, j, i, 1) = val;
572 TILE_PIXEL(tile, j, i, 2) = val;
573 TILE_PIXEL(tile, j, i, 3) = val;
574 }
575 }
576
577 outline_tile(tile);
578 }
579
580
581
582 /**
583 * Called when we're done writing to a color tile.
584 */
585 static void
586 lp_rast_tile_end(struct lp_rasterizer_task *task)
587 {
588 #ifdef DEBUG
589 if (LP_DEBUG & (DEBUG_SHOW_SUBTILES | DEBUG_SHOW_TILES)) {
590 struct lp_rasterizer *rast = task->rast;
591 unsigned buf;
592
593 for (buf = 0; buf < rast->state.nr_cbufs; buf++) {
594 uint8_t *color = lp_rast_get_color_block_pointer(task, buf,
595 task->x, task->y);
596
597 if (LP_DEBUG & DEBUG_SHOW_SUBTILES)
598 outline_subtiles(color);
599 else if (LP_DEBUG & DEBUG_SHOW_TILES)
600 outline_tile(color);
601 }
602 }
603 #else
604 (void) outline_subtiles;
605 #endif
606
607 /* debug */
608 memset(task->color_tiles, 0, sizeof(task->color_tiles));
609 task->depth_tile = NULL;
610 }
611
612
613
614 /**
615 * Signal on a fence. This is called during bin execution/rasterization.
616 * Called per thread.
617 */
618 void
619 lp_rast_fence(struct lp_rasterizer_task *task,
620 const union lp_rast_cmd_arg arg)
621 {
622 struct lp_fence *fence = arg.fence;
623 lp_fence_signal(fence);
624 }
625
626
627 /**
628 * Begin a new occlusion query.
629 * This is a bin command put in all bins.
630 * Called per thread.
631 */
632 void
633 lp_rast_begin_query(struct lp_rasterizer_task *task,
634 const union lp_rast_cmd_arg arg)
635 {
636 /* Reset the per-task counter */
637 task->vis_counter = 0;
638 }
639
640
641 /**
642 * End the current occlusion query.
643 * This is a bin command put in all bins.
644 * Called per thread.
645 */
646 void
647 lp_rast_end_query(struct lp_rasterizer_task *task,
648 const union lp_rast_cmd_arg arg)
649 {
650 struct llvmpipe_query *pq = arg.query_obj;
651
652 pipe_mutex_lock(pq->mutex);
653 {
654 /* Accumulate the visible fragment counter from this tile in
655 * the query object.
656 */
657 pq->count[task->thread_index] += task->vis_counter;
658
659 /* check if this is the last tile in the scene */
660 pq->tile_count++;
661 if (pq->tile_count == pq->num_tiles) {
662 uint i;
663
664 /* sum the per-thread counters for the query */
665 pq->result = 0;
666 for (i = 0; i < LP_MAX_THREADS; i++) {
667 pq->result += pq->count[i];
668 }
669
670 /* reset counters (in case this query is re-used in the scene) */
671 memset(pq->count, 0, sizeof(pq->count));
672
673 pq->tile_count = 0;
674 pq->binned = FALSE;
675 pq->done = TRUE;
676 }
677 }
678 pipe_mutex_unlock(pq->mutex);
679 }
680
681
682
683 /**
684 * Rasterize commands for a single bin.
685 * \param x, y position of the bin's tile in the framebuffer
686 * Must be called between lp_rast_begin() and lp_rast_end().
687 * Called per thread.
688 */
689 static void
690 rasterize_bin(struct lp_rasterizer_task *task,
691 const struct cmd_bin *bin,
692 int x, int y)
693 {
694 const struct cmd_block_list *commands = &bin->commands;
695 struct cmd_block *block;
696 unsigned k;
697
698 lp_rast_tile_begin( task, x * TILE_SIZE, y * TILE_SIZE );
699
700 /* simply execute each of the commands in the block list */
701 for (block = commands->head; block; block = block->next) {
702 for (k = 0; k < block->count; k++) {
703 block->cmd[k]( task, block->arg[k] );
704 }
705 }
706
707 lp_rast_tile_end(task);
708
709 /* Free data for this bin.
710 */
711 lp_scene_bin_reset( task->rast->curr_scene, x, y);
712 }
713
714
715 #define RAST(x) { lp_rast_##x, #x }
716
717 static struct {
718 lp_rast_cmd cmd;
719 const char *name;
720 } cmd_names[] =
721 {
722 RAST(clear_color),
723 RAST(clear_zstencil),
724 RAST(triangle_1),
725 RAST(triangle_2),
726 RAST(triangle_3),
727 RAST(triangle_4),
728 RAST(triangle_5),
729 RAST(triangle_6),
730 RAST(triangle_7),
731 RAST(shade_tile),
732 RAST(shade_tile_opaque),
733 RAST(set_state),
734 RAST(store_linear_color),
735 RAST(fence),
736 RAST(begin_query),
737 RAST(end_query),
738 };
739
740 static void
741 debug_bin( const struct cmd_bin *bin )
742 {
743 const struct cmd_block *head = bin->commands.head;
744 int i, j;
745
746 for (i = 0; i < head->count; i++) {
747 debug_printf("%d: ", i);
748 for (j = 0; j < Elements(cmd_names); j++) {
749 if (head->cmd[i] == cmd_names[j].cmd) {
750 debug_printf("%s\n", cmd_names[j].name);
751 break;
752 }
753 }
754 if (j == Elements(cmd_names))
755 debug_printf("...other\n");
756 }
757
758 }
759
760 /* An empty bin is one that just loads the contents of the tile and
761 * stores them again unchanged. This typically happens when bins have
762 * been flushed for some reason in the middle of a frame, or when
763 * incremental updates are being made to a render target.
764 *
765 * Try to avoid doing pointless work in this case.
766 */
767 static boolean
768 is_empty_bin( const struct cmd_bin *bin )
769 {
770 const struct cmd_block *head = bin->commands.head;
771 int i;
772
773 if (0)
774 debug_bin(bin);
775
776 /* We emit at most two load-tile commands at the start of the first
777 * command block. In addition we seem to emit a couple of
778 * set-state commands even in empty bins.
779 *
780 * As a heuristic, if a bin has more than 4 commands, consider it
781 * non-empty.
782 */
783 if (head->next != NULL ||
784 head->count > 4) {
785 return FALSE;
786 }
787
788 for (i = 0; i < head->count; i++)
789 if (head->cmd[i] != lp_rast_set_state &&
790 head->cmd[i] != lp_rast_store_linear_color) {
791 return FALSE;
792 }
793
794 return TRUE;
795 }
796
797
798
799 /**
800 * Rasterize/execute all bins within a scene.
801 * Called per thread.
802 */
803 static void
804 rasterize_scene(struct lp_rasterizer_task *task,
805 struct lp_scene *scene)
806 {
807 /* loop over scene bins, rasterize each */
808 #if 0
809 {
810 unsigned i, j;
811 for (i = 0; i < scene->tiles_x; i++) {
812 for (j = 0; j < scene->tiles_y; j++) {
813 struct cmd_bin *bin = lp_scene_get_bin(scene, i, j);
814 rasterize_bin(task, bin, i, j);
815 }
816 }
817 }
818 #else
819 {
820 struct cmd_bin *bin;
821 int x, y;
822
823 assert(scene);
824 while ((bin = lp_scene_bin_iter_next(scene, &x, &y))) {
825 if (!is_empty_bin( bin ))
826 rasterize_bin(task, bin, x, y);
827 }
828 }
829 #endif
830
831 if (scene->fence) {
832 lp_rast_fence(task, lp_rast_arg_fence(scene->fence));
833 }
834 }
835
836
837 /**
838 * Called by setup module when it has something for us to render.
839 */
840 void
841 lp_rast_queue_scene( struct lp_rasterizer *rast,
842 struct lp_scene *scene)
843 {
844 LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__);
845
846 if (rast->num_threads == 0) {
847 /* no threading */
848
849 lp_rast_begin( rast, scene );
850
851 rasterize_scene( &rast->tasks[0], scene );
852
853 lp_scene_reset( scene );
854
855 lp_rast_end( rast );
856
857 rast->curr_scene = NULL;
858 }
859 else {
860 /* threaded rendering! */
861 unsigned i;
862
863 lp_scene_enqueue( rast->full_scenes, scene );
864
865 /* signal the threads that there's work to do */
866 for (i = 0; i < rast->num_threads; i++) {
867 pipe_semaphore_signal(&rast->tasks[i].work_ready);
868 }
869 }
870
871 LP_DBG(DEBUG_SETUP, "%s done \n", __FUNCTION__);
872 }
873
874
875 void
876 lp_rast_finish( struct lp_rasterizer *rast )
877 {
878 if (rast->num_threads == 0) {
879 /* nothing to do */
880 }
881 else {
882 int i;
883
884 /* wait for work to complete */
885 for (i = 0; i < rast->num_threads; i++) {
886 pipe_semaphore_wait(&rast->tasks[i].work_done);
887 }
888 }
889 }
890
891
892 /**
893 * This is the thread's main entrypoint.
894 * It's a simple loop:
895 * 1. wait for work
896 * 2. do work
897 * 3. signal that we're done
898 */
899 static PIPE_THREAD_ROUTINE( thread_func, init_data )
900 {
901 struct lp_rasterizer_task *task = (struct lp_rasterizer_task *) init_data;
902 struct lp_rasterizer *rast = task->rast;
903 boolean debug = false;
904
905 while (1) {
906 /* wait for work */
907 if (debug)
908 debug_printf("thread %d waiting for work\n", task->thread_index);
909 pipe_semaphore_wait(&task->work_ready);
910
911 if (rast->exit_flag)
912 break;
913
914 if (task->thread_index == 0) {
915 /* thread[0]:
916 * - get next scene to rasterize
917 * - map the framebuffer surfaces
918 */
919 lp_rast_begin( rast,
920 lp_scene_dequeue( rast->full_scenes, TRUE ) );
921 }
922
923 /* Wait for all threads to get here so that threads[1+] don't
924 * get a null rast->curr_scene pointer.
925 */
926 pipe_barrier_wait( &rast->barrier );
927
928 /* do work */
929 if (debug)
930 debug_printf("thread %d doing work\n", task->thread_index);
931
932 rasterize_scene(task,
933 rast->curr_scene);
934
935 /* wait for all threads to finish with this scene */
936 pipe_barrier_wait( &rast->barrier );
937
938 /* XXX: shouldn't be necessary:
939 */
940 if (task->thread_index == 0) {
941 lp_rast_end( rast );
942 }
943
944 /* signal done with work */
945 if (debug)
946 debug_printf("thread %d done working\n", task->thread_index);
947
948 pipe_semaphore_signal(&task->work_done);
949 }
950
951 return NULL;
952 }
953
954
955 /**
956 * Initialize semaphores and spawn the threads.
957 */
958 static void
959 create_rast_threads(struct lp_rasterizer *rast)
960 {
961 unsigned i;
962
963 /* NOTE: if num_threads is zero, we won't use any threads */
964 for (i = 0; i < rast->num_threads; i++) {
965 pipe_semaphore_init(&rast->tasks[i].work_ready, 0);
966 pipe_semaphore_init(&rast->tasks[i].work_done, 0);
967 rast->threads[i] = pipe_thread_create(thread_func,
968 (void *) &rast->tasks[i]);
969 }
970 }
971
972
973
974 /**
975 * Create new lp_rasterizer. If num_threads is zero, don't create any
976 * new threads, do rendering synchronously.
977 * \param num_threads number of rasterizer threads to create
978 */
979 struct lp_rasterizer *
980 lp_rast_create( unsigned num_threads )
981 {
982 struct lp_rasterizer *rast;
983 unsigned i;
984
985 rast = CALLOC_STRUCT(lp_rasterizer);
986 if(!rast)
987 return NULL;
988
989 rast->full_scenes = lp_scene_queue_create();
990
991 for (i = 0; i < Elements(rast->tasks); i++) {
992 struct lp_rasterizer_task *task = &rast->tasks[i];
993 task->rast = rast;
994 task->thread_index = i;
995 }
996
997 rast->num_threads = num_threads;
998
999 create_rast_threads(rast);
1000
1001 /* for synchronizing rasterization threads */
1002 pipe_barrier_init( &rast->barrier, rast->num_threads );
1003
1004 return rast;
1005 }
1006
1007
1008 /* Shutdown:
1009 */
1010 void lp_rast_destroy( struct lp_rasterizer *rast )
1011 {
1012 unsigned i;
1013
1014 /* Set exit_flag and signal each thread's work_ready semaphore.
1015 * Each thread will be woken up, notice that the exit_flag is set and
1016 * break out of its main loop. The thread will then exit.
1017 */
1018 rast->exit_flag = TRUE;
1019 for (i = 0; i < rast->num_threads; i++) {
1020 pipe_semaphore_signal(&rast->tasks[i].work_ready);
1021 }
1022
1023 /* Wait for threads to terminate before cleaning up per-thread data */
1024 for (i = 0; i < rast->num_threads; i++) {
1025 pipe_thread_wait(rast->threads[i]);
1026 }
1027
1028 /* Clean up per-thread data */
1029 for (i = 0; i < rast->num_threads; i++) {
1030 pipe_semaphore_destroy(&rast->tasks[i].work_ready);
1031 pipe_semaphore_destroy(&rast->tasks[i].work_done);
1032 }
1033
1034 /* for synchronizing rasterization threads */
1035 pipe_barrier_destroy( &rast->barrier );
1036
1037 lp_scene_queue_destroy(rast->full_scenes);
1038
1039 FREE(rast);
1040 }
1041
1042
1043 /** Return number of rasterization threads */
1044 unsigned
1045 lp_rast_get_num_threads( struct lp_rasterizer *rast )
1046 {
1047 return rast->num_threads;
1048 }
1049
1050