654f4ea48ebeb965e448d2d25f68379d6f89995c
[mesa.git] / src / gallium / drivers / llvmpipe / lp_rast.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 #include <limits.h>
29 #include "util/u_memory.h"
30 #include "util/u_math.h"
31 #include "util/u_rect.h"
32 #include "util/u_surface.h"
33
34 #include "lp_scene_queue.h"
35 #include "lp_debug.h"
36 #include "lp_fence.h"
37 #include "lp_perf.h"
38 #include "lp_query.h"
39 #include "lp_rast.h"
40 #include "lp_rast_priv.h"
41 #include "lp_tile_soa.h"
42 #include "gallivm/lp_bld_debug.h"
43 #include "lp_scene.h"
44
45
46 /**
47 * Begin rasterizing a scene.
48 * Called once per scene by one thread.
49 */
50 static void
51 lp_rast_begin( struct lp_rasterizer *rast,
52 struct lp_scene *scene )
53 {
54 const struct pipe_framebuffer_state *fb = &scene->fb;
55 int i;
56
57 rast->curr_scene = scene;
58
59 LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__);
60
61 rast->state.nr_cbufs = scene->fb.nr_cbufs;
62
63 for (i = 0; i < rast->state.nr_cbufs; i++) {
64 struct pipe_surface *cbuf = scene->fb.cbufs[i];
65 llvmpipe_resource_map(cbuf->texture,
66 cbuf->face,
67 cbuf->level,
68 cbuf->zslice,
69 LP_TEX_USAGE_READ_WRITE,
70 LP_TEX_LAYOUT_LINEAR);
71 }
72
73 if (fb->zsbuf) {
74 struct pipe_surface *zsbuf = scene->fb.zsbuf;
75 rast->zsbuf.stride = llvmpipe_resource_stride(zsbuf->texture, zsbuf->level);
76 rast->zsbuf.blocksize =
77 util_format_get_blocksize(zsbuf->texture->format);
78
79 rast->zsbuf.map = llvmpipe_resource_map(zsbuf->texture,
80 zsbuf->face,
81 zsbuf->level,
82 zsbuf->zslice,
83 LP_TEX_USAGE_READ_WRITE,
84 LP_TEX_LAYOUT_NONE);
85 }
86
87 lp_scene_bin_iter_begin( scene );
88 }
89
90
91 static void
92 lp_rast_end( struct lp_rasterizer *rast )
93 {
94 struct lp_scene *scene = rast->curr_scene;
95 unsigned i;
96
97 /* Unmap color buffers */
98 for (i = 0; i < rast->state.nr_cbufs; i++) {
99 struct pipe_surface *cbuf = scene->fb.cbufs[i];
100 llvmpipe_resource_unmap(cbuf->texture,
101 cbuf->face,
102 cbuf->level,
103 cbuf->zslice);
104 }
105
106 /* Unmap z/stencil buffer */
107 if (rast->zsbuf.map) {
108 struct pipe_surface *zsbuf = scene->fb.zsbuf;
109 llvmpipe_resource_unmap(zsbuf->texture,
110 zsbuf->face,
111 zsbuf->level,
112 zsbuf->zslice);
113 rast->zsbuf.map = NULL;
114 }
115
116 lp_scene_reset( rast->curr_scene );
117
118 rast->curr_scene = NULL;
119
120 #ifdef DEBUG
121 if (0)
122 debug_printf("Post render scene: tile unswizzle: %u tile swizzle: %u\n",
123 lp_tile_unswizzle_count, lp_tile_swizzle_count);
124 #endif
125 }
126
127
128 /**
129 * Begining rasterization of a tile.
130 * \param x window X position of the tile, in pixels
131 * \param y window Y position of the tile, in pixels
132 */
133 static void
134 lp_rast_tile_begin(struct lp_rasterizer_task *task,
135 unsigned x, unsigned y)
136 {
137 struct lp_rasterizer *rast = task->rast;
138 struct lp_scene *scene = rast->curr_scene;
139 enum lp_texture_usage usage;
140
141 LP_DBG(DEBUG_RAST, "%s %d,%d\n", __FUNCTION__, x, y);
142
143 assert(x % TILE_SIZE == 0);
144 assert(y % TILE_SIZE == 0);
145
146 task->x = x;
147 task->y = y;
148
149 /* reset pointers to color tile(s) */
150 memset(task->color_tiles, 0, sizeof(task->color_tiles));
151
152 /* get pointer to depth/stencil tile */
153 {
154 struct pipe_surface *zsbuf = rast->curr_scene->fb.zsbuf;
155 if (zsbuf) {
156 struct llvmpipe_resource *lpt = llvmpipe_resource(zsbuf->texture);
157
158 if (scene->has_depthstencil_clear)
159 usage = LP_TEX_USAGE_WRITE_ALL;
160 else
161 usage = LP_TEX_USAGE_READ_WRITE;
162
163 /* "prime" the tile: convert data from linear to tiled if necessary
164 * and update the tile's layout info.
165 */
166 (void) llvmpipe_get_texture_tile(lpt,
167 zsbuf->face + zsbuf->zslice,
168 zsbuf->level,
169 usage,
170 x, y);
171 /* Get actual pointer to the tile data. Note that depth/stencil
172 * data is tiled differently than color data.
173 */
174 task->depth_tile = lp_rast_get_depth_block_pointer(task, x, y);
175
176 assert(task->depth_tile);
177 }
178 else {
179 task->depth_tile = NULL;
180 }
181 }
182 }
183
184
185 /**
186 * Clear the rasterizer's current color tile.
187 * This is a bin command called during bin processing.
188 */
189 void
190 lp_rast_clear_color(struct lp_rasterizer_task *task,
191 const union lp_rast_cmd_arg arg)
192 {
193 struct lp_rasterizer *rast = task->rast;
194 const uint8_t *clear_color = arg.clear_color;
195
196 unsigned i;
197
198 LP_DBG(DEBUG_RAST, "%s 0x%x,0x%x,0x%x,0x%x\n", __FUNCTION__,
199 clear_color[0],
200 clear_color[1],
201 clear_color[2],
202 clear_color[3]);
203
204 if (clear_color[0] == clear_color[1] &&
205 clear_color[1] == clear_color[2] &&
206 clear_color[2] == clear_color[3]) {
207 /* clear to grayscale value {x, x, x, x} */
208 for (i = 0; i < rast->state.nr_cbufs; i++) {
209 uint8_t *ptr =
210 lp_rast_get_color_tile_pointer(task, i, LP_TEX_USAGE_WRITE_ALL);
211 memset(ptr, clear_color[0], TILE_SIZE * TILE_SIZE * 4);
212 }
213 }
214 else {
215 /* Non-gray color.
216 * Note: if the swizzled tile layout changes (see TILE_PIXEL) this code
217 * will need to change. It'll be pretty obvious when clearing no longer
218 * works.
219 */
220 const unsigned chunk = TILE_SIZE / 4;
221 for (i = 0; i < rast->state.nr_cbufs; i++) {
222 uint8_t *c =
223 lp_rast_get_color_tile_pointer(task, i, LP_TEX_USAGE_WRITE_ALL);
224 unsigned j;
225
226 for (j = 0; j < 4 * TILE_SIZE; j++) {
227 memset(c, clear_color[0], chunk);
228 c += chunk;
229 memset(c, clear_color[1], chunk);
230 c += chunk;
231 memset(c, clear_color[2], chunk);
232 c += chunk;
233 memset(c, clear_color[3], chunk);
234 c += chunk;
235 }
236 }
237 }
238
239 LP_COUNT(nr_color_tile_clear);
240 }
241
242
243 /**
244 * Clear the rasterizer's current z/stencil tile.
245 * This is a bin command called during bin processing.
246 */
247 void
248 lp_rast_clear_zstencil(struct lp_rasterizer_task *task,
249 const union lp_rast_cmd_arg arg)
250 {
251 struct lp_rasterizer *rast = task->rast;
252 const struct lp_rast_clearzs *clearzs = arg.clear_zstencil;
253 unsigned clear_value = clearzs->clearzs_value;
254 unsigned clear_mask = clearzs->clearzs_mask;
255 const unsigned height = TILE_SIZE / TILE_VECTOR_HEIGHT;
256 const unsigned width = TILE_SIZE * TILE_VECTOR_HEIGHT;
257 const unsigned block_size = rast->zsbuf.blocksize;
258 const unsigned dst_stride = rast->zsbuf.stride * TILE_VECTOR_HEIGHT;
259 uint8_t *dst;
260 unsigned i, j;
261
262 LP_DBG(DEBUG_RAST, "%s 0x%x%x\n", __FUNCTION__, clear_value, clear_mask);
263
264 /*
265 * Clear the aera of the swizzled depth/depth buffer matching this tile, in
266 * stripes of TILE_VECTOR_HEIGHT x TILE_SIZE at a time.
267 *
268 * The swizzled depth format is such that the depths for
269 * TILE_VECTOR_HEIGHT x TILE_VECTOR_WIDTH pixels have consecutive offsets.
270 */
271
272 dst = task->depth_tile;
273
274 switch (block_size) {
275 case 1:
276 memset(dst, (uint8_t) clear_value, height * width);
277 break;
278 case 2:
279 for (i = 0; i < height; i++) {
280 uint16_t *row = (uint16_t *)dst;
281 for (j = 0; j < width; j++)
282 *row++ = (uint16_t) clear_value;
283 dst += dst_stride;
284 }
285 break;
286 case 4:
287 if (clear_mask == 0xffffffff) {
288 for (i = 0; i < height; i++) {
289 uint32_t *row = (uint32_t *)dst;
290 for (j = 0; j < width; j++)
291 *row++ = clear_value;
292 dst += dst_stride;
293 }
294 }
295 else {
296 for (i = 0; i < height; i++) {
297 uint32_t *row = (uint32_t *)dst;
298 for (j = 0; j < width; j++) {
299 uint32_t tmp = ~clear_mask & *row;
300 *row++ = (clear_value & clear_mask) | tmp;
301 }
302 dst += dst_stride;
303 }
304 }
305 break;
306 default:
307 assert(0);
308 break;
309 }
310 }
311
312
313 /**
314 * Load tile color from the framebuffer surface.
315 * This is a bin command called during bin processing.
316 */
317 #if 0
318 void
319 lp_rast_load_color(struct lp_rasterizer_task *task,
320 const union lp_rast_cmd_arg arg)
321 {
322 struct lp_rasterizer *rast = task->rast;
323 unsigned buf;
324 enum lp_texture_usage usage;
325
326 LP_DBG(DEBUG_RAST, "%s at %u, %u\n", __FUNCTION__, x, y);
327
328 if (scene->has_color_clear)
329 usage = LP_TEX_USAGE_WRITE_ALL;
330 else
331 usage = LP_TEX_USAGE_READ_WRITE;
332
333 /* Get pointers to color tile(s).
334 * This will convert linear data to tiled if needed.
335 */
336 for (buf = 0; buf < rast->state.nr_cbufs; buf++) {
337 struct pipe_surface *cbuf = rast->curr_scene->fb.cbufs[buf];
338 struct llvmpipe_texture *lpt;
339 assert(cbuf);
340 lpt = llvmpipe_texture(cbuf->texture);
341 task->color_tiles[buf] = llvmpipe_get_texture_tile(lpt,
342 cbuf->face + cbuf->zslice,
343 cbuf->level,
344 usage,
345 task->x, task->y);
346 assert(task->color_tiles[buf]);
347 }
348 }
349 #endif
350
351
352 /**
353 * Convert the color tile from tiled to linear layout.
354 * This is generally only done when we're flushing the scene just prior to
355 * SwapBuffers. If we didn't do this here, we'd have to convert the entire
356 * tiled color buffer to linear layout in the llvmpipe_texture_unmap()
357 * function. It's better to do it here to take advantage of
358 * threading/parallelism.
359 * This is a bin command which is stored in all bins.
360 */
361 void
362 lp_rast_store_linear_color( struct lp_rasterizer_task *task,
363 const union lp_rast_cmd_arg arg)
364 {
365 struct lp_rasterizer *rast = task->rast;
366 struct lp_scene *scene = rast->curr_scene;
367 unsigned buf;
368
369 for (buf = 0; buf < rast->state.nr_cbufs; buf++) {
370 struct pipe_surface *cbuf = scene->fb.cbufs[buf];
371 const unsigned face = cbuf->face, level = cbuf->level;
372 struct llvmpipe_resource *lpt = llvmpipe_resource(cbuf->texture);
373
374 if (!task->color_tiles[buf])
375 continue;
376
377 llvmpipe_unswizzle_cbuf_tile(lpt,
378 face,
379 level,
380 task->x, task->y,
381 task->color_tiles[buf]);
382 }
383 }
384
385
386
387 /**
388 * Run the shader on all blocks in a tile. This is used when a tile is
389 * completely contained inside a triangle.
390 * This is a bin command called during bin processing.
391 */
392 void
393 lp_rast_shade_tile(struct lp_rasterizer_task *task,
394 const union lp_rast_cmd_arg arg)
395 {
396 struct lp_rasterizer *rast = task->rast;
397 const struct lp_rast_shader_inputs *inputs = arg.shade_tile;
398 const struct lp_rast_state *state = inputs->state;
399 struct lp_fragment_shader_variant *variant = state->variant;
400 const unsigned tile_x = task->x, tile_y = task->y;
401 unsigned x, y;
402
403 LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__);
404
405 /* render the whole 64x64 tile in 4x4 chunks */
406 for (y = 0; y < TILE_SIZE; y += 4){
407 for (x = 0; x < TILE_SIZE; x += 4) {
408 uint8_t *color[PIPE_MAX_COLOR_BUFS];
409 uint32_t *depth;
410 unsigned i;
411
412 /* color buffer */
413 for (i = 0; i < rast->state.nr_cbufs; i++)
414 color[i] = lp_rast_get_color_block_pointer(task, i,
415 tile_x + x, tile_y + y);
416
417 /* depth buffer */
418 depth = lp_rast_get_depth_block_pointer(task, tile_x + x, tile_y + y);
419
420 /* run shader on 4x4 block */
421 variant->jit_function[RAST_WHOLE]( &state->jit_context,
422 tile_x + x, tile_y + y,
423 inputs->facing,
424 inputs->a0,
425 inputs->dadx,
426 inputs->dady,
427 color,
428 depth,
429 0xffff,
430 &task->vis_counter);
431 }
432 }
433 }
434
435
436 /**
437 * Run the shader on all blocks in a tile. This is used when a tile is
438 * completely contained inside a triangle, and the shader is opaque.
439 * This is a bin command called during bin processing.
440 */
441 void
442 lp_rast_shade_tile_opaque(struct lp_rasterizer_task *task,
443 const union lp_rast_cmd_arg arg)
444 {
445 struct lp_rasterizer *rast = task->rast;
446 unsigned i;
447
448 LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__);
449
450 /* this will prevent converting the layout from tiled to linear */
451 for (i = 0; i < rast->state.nr_cbufs; i++) {
452 (void)lp_rast_get_color_tile_pointer(task, i, LP_TEX_USAGE_WRITE_ALL);
453 }
454
455 lp_rast_shade_tile(task, arg);
456 }
457
458
459 /**
460 * Compute shading for a 4x4 block of pixels inside a triangle.
461 * This is a bin command called during bin processing.
462 * \param x X position of quad in window coords
463 * \param y Y position of quad in window coords
464 */
465 void
466 lp_rast_shade_quads_mask(struct lp_rasterizer_task *task,
467 const struct lp_rast_shader_inputs *inputs,
468 unsigned x, unsigned y,
469 unsigned mask)
470 {
471 const struct lp_rast_state *state = inputs->state;
472 struct lp_fragment_shader_variant *variant = state->variant;
473 struct lp_rasterizer *rast = task->rast;
474 uint8_t *color[PIPE_MAX_COLOR_BUFS];
475 void *depth;
476 unsigned i;
477
478 assert(state);
479
480 /* Sanity checks */
481 assert(x % TILE_VECTOR_WIDTH == 0);
482 assert(y % TILE_VECTOR_HEIGHT == 0);
483
484 assert((x % 4) == 0);
485 assert((y % 4) == 0);
486
487 /* color buffer */
488 for (i = 0; i < rast->state.nr_cbufs; i++) {
489 color[i] = lp_rast_get_color_block_pointer(task, i, x, y);
490 assert(lp_check_alignment(color[i], 16));
491 }
492
493 /* depth buffer */
494 depth = lp_rast_get_depth_block_pointer(task, x, y);
495
496
497 assert(lp_check_alignment(state->jit_context.blend_color, 16));
498
499 /* run shader on 4x4 block */
500 variant->jit_function[RAST_EDGE_TEST](&state->jit_context,
501 x, y,
502 inputs->facing,
503 inputs->a0,
504 inputs->dadx,
505 inputs->dady,
506 color,
507 depth,
508 mask,
509 &task->vis_counter);
510 }
511
512
513
514 /**
515 * Set top row and left column of the tile's pixels to white. For debugging.
516 */
517 static void
518 outline_tile(uint8_t *tile)
519 {
520 const uint8_t val = 0xff;
521 unsigned i;
522
523 for (i = 0; i < TILE_SIZE; i++) {
524 TILE_PIXEL(tile, i, 0, 0) = val;
525 TILE_PIXEL(tile, i, 0, 1) = val;
526 TILE_PIXEL(tile, i, 0, 2) = val;
527 TILE_PIXEL(tile, i, 0, 3) = val;
528
529 TILE_PIXEL(tile, 0, i, 0) = val;
530 TILE_PIXEL(tile, 0, i, 1) = val;
531 TILE_PIXEL(tile, 0, i, 2) = val;
532 TILE_PIXEL(tile, 0, i, 3) = val;
533 }
534 }
535
536
537 /**
538 * Draw grid of gray lines at 16-pixel intervals across the tile to
539 * show the sub-tile boundaries. For debugging.
540 */
541 static void
542 outline_subtiles(uint8_t *tile)
543 {
544 const uint8_t val = 0x80;
545 const unsigned step = 16;
546 unsigned i, j;
547
548 for (i = 0; i < TILE_SIZE; i += step) {
549 for (j = 0; j < TILE_SIZE; j++) {
550 TILE_PIXEL(tile, i, j, 0) = val;
551 TILE_PIXEL(tile, i, j, 1) = val;
552 TILE_PIXEL(tile, i, j, 2) = val;
553 TILE_PIXEL(tile, i, j, 3) = val;
554
555 TILE_PIXEL(tile, j, i, 0) = val;
556 TILE_PIXEL(tile, j, i, 1) = val;
557 TILE_PIXEL(tile, j, i, 2) = val;
558 TILE_PIXEL(tile, j, i, 3) = val;
559 }
560 }
561
562 outline_tile(tile);
563 }
564
565
566
567 /**
568 * Called when we're done writing to a color tile.
569 */
570 static void
571 lp_rast_tile_end(struct lp_rasterizer_task *task)
572 {
573 #ifdef DEBUG
574 if (LP_DEBUG & (DEBUG_SHOW_SUBTILES | DEBUG_SHOW_TILES)) {
575 struct lp_rasterizer *rast = task->rast;
576 unsigned buf;
577
578 for (buf = 0; buf < rast->state.nr_cbufs; buf++) {
579 uint8_t *color = lp_rast_get_color_block_pointer(task, buf,
580 task->x, task->y);
581
582 if (LP_DEBUG & DEBUG_SHOW_SUBTILES)
583 outline_subtiles(color);
584 else if (LP_DEBUG & DEBUG_SHOW_TILES)
585 outline_tile(color);
586 }
587 }
588 #else
589 (void) outline_subtiles;
590 #endif
591
592 {
593 union lp_rast_cmd_arg dummy = {0};
594 lp_rast_store_linear_color(task, dummy);
595 }
596
597 /* debug */
598 memset(task->color_tiles, 0, sizeof(task->color_tiles));
599 task->depth_tile = NULL;
600 }
601
602
603
604 /**
605 * Signal on a fence. This is called during bin execution/rasterization.
606 * Called per thread.
607 */
608 void
609 lp_rast_fence(struct lp_rasterizer_task *task,
610 const union lp_rast_cmd_arg arg)
611 {
612 struct lp_fence *fence = arg.fence;
613 lp_fence_signal(fence);
614 }
615
616
617 /**
618 * Begin a new occlusion query.
619 * This is a bin command put in all bins.
620 * Called per thread.
621 */
622 void
623 lp_rast_begin_query(struct lp_rasterizer_task *task,
624 const union lp_rast_cmd_arg arg)
625 {
626 /* Reset the per-task counter */
627 task->vis_counter = 0;
628 }
629
630
631 /**
632 * End the current occlusion query.
633 * This is a bin command put in all bins.
634 * Called per thread.
635 */
636 void
637 lp_rast_end_query(struct lp_rasterizer_task *task,
638 const union lp_rast_cmd_arg arg)
639 {
640 struct llvmpipe_query *pq = arg.query_obj;
641
642 pipe_mutex_lock(pq->mutex);
643 {
644 /* Accumulate the visible fragment counter from this tile in
645 * the query object.
646 */
647 pq->count[task->thread_index] += task->vis_counter;
648
649 /* check if this is the last tile in the scene */
650 pq->tile_count++;
651 if (pq->tile_count == pq->num_tiles) {
652 uint i;
653
654 /* sum the per-thread counters for the query */
655 pq->result = 0;
656 for (i = 0; i < LP_MAX_THREADS; i++) {
657 pq->result += pq->count[i];
658 }
659
660 /* reset counters (in case this query is re-used in the scene) */
661 memset(pq->count, 0, sizeof(pq->count));
662
663 pq->tile_count = 0;
664 pq->binned = FALSE;
665 pq->done = TRUE;
666 }
667 }
668 pipe_mutex_unlock(pq->mutex);
669 }
670
671
672
673 /**
674 * Rasterize commands for a single bin.
675 * \param x, y position of the bin's tile in the framebuffer
676 * Must be called between lp_rast_begin() and lp_rast_end().
677 * Called per thread.
678 */
679 static void
680 rasterize_bin(struct lp_rasterizer_task *task,
681 const struct cmd_bin *bin,
682 int x, int y)
683 {
684 const struct cmd_block_list *commands = &bin->commands;
685 struct cmd_block *block;
686 unsigned k;
687
688 lp_rast_tile_begin( task, x * TILE_SIZE, y * TILE_SIZE );
689
690 /* simply execute each of the commands in the block list */
691 for (block = commands->head; block; block = block->next) {
692 for (k = 0; k < block->count; k++) {
693 block->cmd[k]( task, block->arg[k] );
694 }
695 }
696
697 lp_rast_tile_end(task);
698
699 /* Free data for this bin.
700 */
701 lp_scene_bin_reset( task->rast->curr_scene, x, y);
702 }
703
704
705 #define RAST(x) { lp_rast_##x, #x }
706
707 static struct {
708 lp_rast_cmd cmd;
709 const char *name;
710 } cmd_names[] =
711 {
712 RAST(clear_color),
713 RAST(clear_zstencil),
714 RAST(triangle_1),
715 RAST(triangle_2),
716 RAST(triangle_3),
717 RAST(triangle_4),
718 RAST(triangle_5),
719 RAST(triangle_6),
720 RAST(triangle_7),
721 RAST(shade_tile),
722 RAST(shade_tile_opaque),
723 RAST(store_linear_color),
724 RAST(fence),
725 RAST(begin_query),
726 RAST(end_query),
727 };
728
729 static void
730 debug_bin( const struct cmd_bin *bin )
731 {
732 const struct cmd_block *head = bin->commands.head;
733 int i, j;
734
735 for (i = 0; i < head->count; i++) {
736 debug_printf("%d: ", i);
737 for (j = 0; j < Elements(cmd_names); j++) {
738 if (head->cmd[i] == cmd_names[j].cmd) {
739 debug_printf("%s\n", cmd_names[j].name);
740 break;
741 }
742 }
743 if (j == Elements(cmd_names))
744 debug_printf("...other\n");
745 }
746
747 }
748
749 /* An empty bin is one that just loads the contents of the tile and
750 * stores them again unchanged. This typically happens when bins have
751 * been flushed for some reason in the middle of a frame, or when
752 * incremental updates are being made to a render target.
753 *
754 * Try to avoid doing pointless work in this case.
755 */
756 static boolean
757 is_empty_bin( const struct cmd_bin *bin )
758 {
759 if (0) debug_bin(bin);
760 return bin->commands.head->count == 0;
761 }
762
763
764
765 /**
766 * Rasterize/execute all bins within a scene.
767 * Called per thread.
768 */
769 static void
770 rasterize_scene(struct lp_rasterizer_task *task,
771 struct lp_scene *scene)
772 {
773 /* loop over scene bins, rasterize each */
774 #if 0
775 {
776 unsigned i, j;
777 for (i = 0; i < scene->tiles_x; i++) {
778 for (j = 0; j < scene->tiles_y; j++) {
779 struct cmd_bin *bin = lp_scene_get_bin(scene, i, j);
780 rasterize_bin(task, bin, i, j);
781 }
782 }
783 }
784 #else
785 {
786 struct cmd_bin *bin;
787 int x, y;
788
789 assert(scene);
790 while ((bin = lp_scene_bin_iter_next(scene, &x, &y))) {
791 if (!is_empty_bin( bin ))
792 rasterize_bin(task, bin, x, y);
793 }
794 }
795 #endif
796
797 if (scene->fence) {
798 lp_rast_fence(task, lp_rast_arg_fence(scene->fence));
799 }
800 }
801
802
803 /**
804 * Called by setup module when it has something for us to render.
805 */
806 void
807 lp_rast_queue_scene( struct lp_rasterizer *rast,
808 struct lp_scene *scene)
809 {
810 LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__);
811
812 if (rast->num_threads == 0) {
813 /* no threading */
814
815 lp_rast_begin( rast, scene );
816
817 rasterize_scene( &rast->tasks[0], scene );
818
819 lp_scene_reset( scene );
820
821 lp_rast_end( rast );
822
823 rast->curr_scene = NULL;
824 }
825 else {
826 /* threaded rendering! */
827 unsigned i;
828
829 lp_scene_enqueue( rast->full_scenes, scene );
830
831 /* signal the threads that there's work to do */
832 for (i = 0; i < rast->num_threads; i++) {
833 pipe_semaphore_signal(&rast->tasks[i].work_ready);
834 }
835 }
836
837 LP_DBG(DEBUG_SETUP, "%s done \n", __FUNCTION__);
838 }
839
840
841 void
842 lp_rast_finish( struct lp_rasterizer *rast )
843 {
844 if (rast->num_threads == 0) {
845 /* nothing to do */
846 }
847 else {
848 int i;
849
850 /* wait for work to complete */
851 for (i = 0; i < rast->num_threads; i++) {
852 pipe_semaphore_wait(&rast->tasks[i].work_done);
853 }
854 }
855 }
856
857
858 /**
859 * This is the thread's main entrypoint.
860 * It's a simple loop:
861 * 1. wait for work
862 * 2. do work
863 * 3. signal that we're done
864 */
865 static PIPE_THREAD_ROUTINE( thread_func, init_data )
866 {
867 struct lp_rasterizer_task *task = (struct lp_rasterizer_task *) init_data;
868 struct lp_rasterizer *rast = task->rast;
869 boolean debug = false;
870
871 while (1) {
872 /* wait for work */
873 if (debug)
874 debug_printf("thread %d waiting for work\n", task->thread_index);
875 pipe_semaphore_wait(&task->work_ready);
876
877 if (rast->exit_flag)
878 break;
879
880 if (task->thread_index == 0) {
881 /* thread[0]:
882 * - get next scene to rasterize
883 * - map the framebuffer surfaces
884 */
885 lp_rast_begin( rast,
886 lp_scene_dequeue( rast->full_scenes, TRUE ) );
887 }
888
889 /* Wait for all threads to get here so that threads[1+] don't
890 * get a null rast->curr_scene pointer.
891 */
892 pipe_barrier_wait( &rast->barrier );
893
894 /* do work */
895 if (debug)
896 debug_printf("thread %d doing work\n", task->thread_index);
897
898 rasterize_scene(task,
899 rast->curr_scene);
900
901 /* wait for all threads to finish with this scene */
902 pipe_barrier_wait( &rast->barrier );
903
904 /* XXX: shouldn't be necessary:
905 */
906 if (task->thread_index == 0) {
907 lp_rast_end( rast );
908 }
909
910 /* signal done with work */
911 if (debug)
912 debug_printf("thread %d done working\n", task->thread_index);
913
914 pipe_semaphore_signal(&task->work_done);
915 }
916
917 return NULL;
918 }
919
920
921 /**
922 * Initialize semaphores and spawn the threads.
923 */
924 static void
925 create_rast_threads(struct lp_rasterizer *rast)
926 {
927 unsigned i;
928
929 /* NOTE: if num_threads is zero, we won't use any threads */
930 for (i = 0; i < rast->num_threads; i++) {
931 pipe_semaphore_init(&rast->tasks[i].work_ready, 0);
932 pipe_semaphore_init(&rast->tasks[i].work_done, 0);
933 rast->threads[i] = pipe_thread_create(thread_func,
934 (void *) &rast->tasks[i]);
935 }
936 }
937
938
939
940 /**
941 * Create new lp_rasterizer. If num_threads is zero, don't create any
942 * new threads, do rendering synchronously.
943 * \param num_threads number of rasterizer threads to create
944 */
945 struct lp_rasterizer *
946 lp_rast_create( unsigned num_threads )
947 {
948 struct lp_rasterizer *rast;
949 unsigned i;
950
951 rast = CALLOC_STRUCT(lp_rasterizer);
952 if(!rast)
953 return NULL;
954
955 rast->full_scenes = lp_scene_queue_create();
956
957 for (i = 0; i < Elements(rast->tasks); i++) {
958 struct lp_rasterizer_task *task = &rast->tasks[i];
959 task->rast = rast;
960 task->thread_index = i;
961 }
962
963 rast->num_threads = num_threads;
964
965 create_rast_threads(rast);
966
967 /* for synchronizing rasterization threads */
968 pipe_barrier_init( &rast->barrier, rast->num_threads );
969
970 memset(lp_swizzled_cbuf, 0, sizeof lp_swizzled_cbuf);
971
972 memset(lp_dummy_tile, 0, sizeof lp_dummy_tile);
973
974 return rast;
975 }
976
977
978 /* Shutdown:
979 */
980 void lp_rast_destroy( struct lp_rasterizer *rast )
981 {
982 unsigned i;
983
984 /* Set exit_flag and signal each thread's work_ready semaphore.
985 * Each thread will be woken up, notice that the exit_flag is set and
986 * break out of its main loop. The thread will then exit.
987 */
988 rast->exit_flag = TRUE;
989 for (i = 0; i < rast->num_threads; i++) {
990 pipe_semaphore_signal(&rast->tasks[i].work_ready);
991 }
992
993 /* Wait for threads to terminate before cleaning up per-thread data */
994 for (i = 0; i < rast->num_threads; i++) {
995 pipe_thread_wait(rast->threads[i]);
996 }
997
998 /* Clean up per-thread data */
999 for (i = 0; i < rast->num_threads; i++) {
1000 pipe_semaphore_destroy(&rast->tasks[i].work_ready);
1001 pipe_semaphore_destroy(&rast->tasks[i].work_done);
1002 }
1003
1004 /* for synchronizing rasterization threads */
1005 pipe_barrier_destroy( &rast->barrier );
1006
1007 lp_scene_queue_destroy(rast->full_scenes);
1008
1009 FREE(rast);
1010 }
1011
1012
1013 /** Return number of rasterization threads */
1014 unsigned
1015 lp_rast_get_num_threads( struct lp_rasterizer *rast )
1016 {
1017 return rast->num_threads;
1018 }
1019
1020