llvmpipe: adapt to clear interface changes
[mesa.git] / src / gallium / drivers / llvmpipe / lp_rast.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 #include <limits.h>
29 #include "util/u_memory.h"
30 #include "util/u_math.h"
31 #include "util/u_surface.h"
32
33 #include "lp_scene_queue.h"
34 #include "lp_debug.h"
35 #include "lp_fence.h"
36 #include "lp_perf.h"
37 #include "lp_query.h"
38 #include "lp_rast.h"
39 #include "lp_rast_priv.h"
40 #include "lp_tile_soa.h"
41 #include "gallivm/lp_bld_debug.h"
42 #include "lp_scene.h"
43
44
45 /**
46 * Begin rasterizing a scene.
47 * Called once per scene by one thread.
48 */
49 static void
50 lp_rast_begin( struct lp_rasterizer *rast,
51 struct lp_scene *scene )
52 {
53 const struct pipe_framebuffer_state *fb = &scene->fb;
54 int i;
55
56 rast->curr_scene = scene;
57
58 LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__);
59
60 rast->state.nr_cbufs = scene->fb.nr_cbufs;
61
62 for (i = 0; i < rast->state.nr_cbufs; i++) {
63 struct pipe_surface *cbuf = scene->fb.cbufs[i];
64 rast->cbuf[i].format = cbuf->texture->format;
65 rast->cbuf[i].tiles_per_row = align(cbuf->width, TILE_SIZE) / TILE_SIZE;
66 rast->cbuf[i].blocksize =
67 util_format_get_blocksize(cbuf->texture->format);
68 rast->cbuf[i].map = llvmpipe_resource_map(cbuf->texture,
69 cbuf->face,
70 cbuf->level,
71 cbuf->zslice,
72 LP_TEX_USAGE_READ_WRITE,
73 LP_TEX_LAYOUT_NONE);
74 }
75
76 if (fb->zsbuf) {
77 struct pipe_surface *zsbuf = scene->fb.zsbuf;
78 rast->zsbuf.stride = llvmpipe_resource_stride(zsbuf->texture, zsbuf->level);
79 rast->zsbuf.blocksize =
80 util_format_get_blocksize(zsbuf->texture->format);
81
82 rast->zsbuf.map = llvmpipe_resource_map(zsbuf->texture,
83 zsbuf->face,
84 zsbuf->level,
85 zsbuf->zslice,
86 LP_TEX_USAGE_READ_WRITE,
87 LP_TEX_LAYOUT_NONE);
88 assert(rast->zsbuf.map);
89 }
90
91 lp_scene_bin_iter_begin( scene );
92 }
93
94
95 static void
96 lp_rast_end( struct lp_rasterizer *rast )
97 {
98 struct lp_scene *scene = rast->curr_scene;
99 unsigned i;
100
101 /* Unmap color buffers */
102 for (i = 0; i < rast->state.nr_cbufs; i++) {
103 struct pipe_surface *cbuf = scene->fb.cbufs[i];
104 llvmpipe_resource_unmap(cbuf->texture,
105 cbuf->face,
106 cbuf->level,
107 cbuf->zslice);
108 rast->cbuf[i].map = NULL;
109 }
110
111 /* Unmap z/stencil buffer */
112 if (rast->zsbuf.map) {
113 struct pipe_surface *zsbuf = scene->fb.zsbuf;
114 llvmpipe_resource_unmap(zsbuf->texture,
115 zsbuf->face,
116 zsbuf->level,
117 zsbuf->zslice);
118 rast->zsbuf.map = NULL;
119 }
120
121 lp_scene_reset( rast->curr_scene );
122
123 rast->curr_scene = NULL;
124
125 #ifdef DEBUG
126 if (0)
127 debug_printf("Post render scene: tile unswizzle: %u tile swizzle: %u\n",
128 lp_tile_unswizzle_count, lp_tile_swizzle_count);
129 #endif
130 }
131
132
133 /**
134 * Begining rasterization of a tile.
135 * \param x window X position of the tile, in pixels
136 * \param y window Y position of the tile, in pixels
137 */
138 static void
139 lp_rast_tile_begin(struct lp_rasterizer_task *task,
140 unsigned x, unsigned y)
141 {
142 struct lp_rasterizer *rast = task->rast;
143 struct lp_scene *scene = rast->curr_scene;
144 enum lp_texture_usage usage;
145 unsigned buf;
146
147 LP_DBG(DEBUG_RAST, "%s %d,%d\n", __FUNCTION__, x, y);
148
149 assert(x % TILE_SIZE == 0);
150 assert(y % TILE_SIZE == 0);
151
152 task->x = x;
153 task->y = y;
154
155 if (scene->has_color_clear)
156 usage = LP_TEX_USAGE_WRITE_ALL;
157 else
158 usage = LP_TEX_USAGE_READ_WRITE;
159
160 /* get pointers to color tile(s) */
161 for (buf = 0; buf < rast->state.nr_cbufs; buf++) {
162 struct pipe_surface *cbuf = rast->curr_scene->fb.cbufs[buf];
163 struct llvmpipe_resource *lpt;
164 assert(cbuf);
165 lpt = llvmpipe_resource(cbuf->texture);
166 task->color_tiles[buf] = llvmpipe_get_texture_tile(lpt,
167 cbuf->face + cbuf->zslice,
168 cbuf->level,
169 usage,
170 x, y);
171 assert(task->color_tiles[buf]);
172 }
173
174 /* get pointer to depth/stencil tile */
175 {
176 struct pipe_surface *zsbuf = rast->curr_scene->fb.zsbuf;
177 if (zsbuf) {
178 struct llvmpipe_resource *lpt = llvmpipe_resource(zsbuf->texture);
179
180 if (scene->has_depthstencil_clear)
181 usage = LP_TEX_USAGE_WRITE_ALL;
182 else
183 usage = LP_TEX_USAGE_READ_WRITE;
184
185 /* "prime" the tile: convert data from linear to tiled if necessary
186 * and update the tile's layout info.
187 */
188 (void) llvmpipe_get_texture_tile(lpt,
189 zsbuf->face + zsbuf->zslice,
190 zsbuf->level,
191 usage,
192 x, y);
193 /* Get actual pointer to the tile data. Note that depth/stencil
194 * data is tiled differently than color data.
195 */
196 task->depth_tile = lp_rast_get_depth_block_pointer(rast, x, y);
197
198 assert(task->depth_tile);
199 }
200 else {
201 task->depth_tile = NULL;
202 }
203 }
204 }
205
206
207 /**
208 * Clear the rasterizer's current color tile.
209 * This is a bin command called during bin processing.
210 */
211 void
212 lp_rast_clear_color(struct lp_rasterizer_task *task,
213 const union lp_rast_cmd_arg arg)
214 {
215 struct lp_rasterizer *rast = task->rast;
216 const uint8_t *clear_color = arg.clear_color;
217
218 unsigned i;
219
220 LP_DBG(DEBUG_RAST, "%s 0x%x,0x%x,0x%x,0x%x\n", __FUNCTION__,
221 clear_color[0],
222 clear_color[1],
223 clear_color[2],
224 clear_color[3]);
225
226 if (clear_color[0] == clear_color[1] &&
227 clear_color[1] == clear_color[2] &&
228 clear_color[2] == clear_color[3]) {
229 /* clear to grayscale value {x, x, x, x} */
230 for (i = 0; i < rast->state.nr_cbufs; i++) {
231 uint8_t *ptr = task->color_tiles[i];
232 memset(ptr, clear_color[0], TILE_SIZE * TILE_SIZE * 4);
233 }
234 }
235 else {
236 /* Non-gray color.
237 * Note: if the swizzled tile layout changes (see TILE_PIXEL) this code
238 * will need to change. It'll be pretty obvious when clearing no longer
239 * works.
240 */
241 const unsigned chunk = TILE_SIZE / 4;
242 for (i = 0; i < rast->state.nr_cbufs; i++) {
243 uint8_t *c = task->color_tiles[i];
244 unsigned j;
245
246 for (j = 0; j < 4 * TILE_SIZE; j++) {
247 memset(c, clear_color[0], chunk);
248 c += chunk;
249 memset(c, clear_color[1], chunk);
250 c += chunk;
251 memset(c, clear_color[2], chunk);
252 c += chunk;
253 memset(c, clear_color[3], chunk);
254 c += chunk;
255 }
256 }
257 }
258
259 LP_COUNT(nr_color_tile_clear);
260 }
261
262
263 /**
264 * Clear the rasterizer's current z/stencil tile.
265 * This is a bin command called during bin processing.
266 */
267 void
268 lp_rast_clear_zstencil(struct lp_rasterizer_task *task,
269 const union lp_rast_cmd_arg arg)
270 {
271 struct lp_rasterizer *rast = task->rast;
272 const struct lp_rast_clearzs *clearzs = arg.clear_zstencil;
273 unsigned clear_value = clearzs->clearzs_value;
274 unsigned clear_mask = clearzs->clearzs_mask;
275 const unsigned height = TILE_SIZE / TILE_VECTOR_HEIGHT;
276 const unsigned width = TILE_SIZE * TILE_VECTOR_HEIGHT;
277 const unsigned block_size = rast->zsbuf.blocksize;
278 const unsigned dst_stride = rast->zsbuf.stride * TILE_VECTOR_HEIGHT;
279 uint8_t *dst;
280 unsigned i, j;
281
282 LP_DBG(DEBUG_RAST, "%s 0x%x%x\n", __FUNCTION__, clear_value, clear_mask);
283
284 /*
285 * Clear the aera of the swizzled depth/depth buffer matching this tile, in
286 * stripes of TILE_VECTOR_HEIGHT x TILE_SIZE at a time.
287 *
288 * The swizzled depth format is such that the depths for
289 * TILE_VECTOR_HEIGHT x TILE_VECTOR_WIDTH pixels have consecutive offsets.
290 */
291
292 dst = task->depth_tile;
293
294 assert(dst == lp_rast_get_depth_block_pointer(rast, task->x, task->y));
295
296 switch (block_size) {
297 case 1:
298 memset(dst, (uint8_t) clear_value, height * width);
299 break;
300 case 2:
301 for (i = 0; i < height; i++) {
302 uint16_t *row = (uint16_t *)dst;
303 for (j = 0; j < width; j++)
304 *row++ = (uint16_t) clear_value;
305 dst += dst_stride;
306 }
307 break;
308 case 4:
309 if (clear_mask == 0xffffffff) {
310 for (i = 0; i < height; i++) {
311 uint32_t *row = (uint32_t *)dst;
312 for (j = 0; j < width; j++)
313 *row++ = clear_value;
314 dst += dst_stride;
315 }
316 }
317 else {
318 for (i = 0; i < height; i++) {
319 uint32_t *row = (uint32_t *)dst;
320 for (j = 0; j < width; j++) {
321 uint32_t tmp = ~clear_mask & *row;
322 *row++ = (clear_value & clear_mask) | tmp;
323 }
324 dst += dst_stride;
325 }
326 }
327 break;
328 default:
329 assert(0);
330 break;
331 }
332 }
333
334
335 /**
336 * Load tile color from the framebuffer surface.
337 * This is a bin command called during bin processing.
338 */
339 #if 0
340 void
341 lp_rast_load_color(struct lp_rasterizer_task *task,
342 const union lp_rast_cmd_arg arg)
343 {
344 struct lp_rasterizer *rast = task->rast;
345 unsigned buf;
346 enum lp_texture_usage usage;
347
348 LP_DBG(DEBUG_RAST, "%s at %u, %u\n", __FUNCTION__, x, y);
349
350 if (scene->has_color_clear)
351 usage = LP_TEX_USAGE_WRITE_ALL;
352 else
353 usage = LP_TEX_USAGE_READ_WRITE;
354
355 /* Get pointers to color tile(s).
356 * This will convert linear data to tiled if needed.
357 */
358 for (buf = 0; buf < rast->state.nr_cbufs; buf++) {
359 struct pipe_surface *cbuf = rast->curr_scene->fb.cbufs[buf];
360 struct llvmpipe_texture *lpt;
361 assert(cbuf);
362 lpt = llvmpipe_texture(cbuf->texture);
363 task->color_tiles[buf] = llvmpipe_get_texture_tile(lpt,
364 cbuf->face + cbuf->zslice,
365 cbuf->level,
366 usage,
367 task->x, task->y);
368 assert(task->color_tiles[buf]);
369 }
370 }
371 #endif
372
373
374 /**
375 * Convert the color tile from tiled to linear layout.
376 * This is generally only done when we're flushing the scene just prior to
377 * SwapBuffers. If we didn't do this here, we'd have to convert the entire
378 * tiled color buffer to linear layout in the llvmpipe_texture_unmap()
379 * function. It's better to do it here to take advantage of
380 * threading/parallelism.
381 * This is a bin command which is stored in all bins.
382 */
383 void
384 lp_rast_store_color( struct lp_rasterizer_task *task,
385 const union lp_rast_cmd_arg arg)
386 {
387 struct lp_rasterizer *rast = task->rast;
388 struct lp_scene *scene = rast->curr_scene;
389 unsigned buf;
390
391 for (buf = 0; buf < rast->state.nr_cbufs; buf++) {
392 struct pipe_surface *cbuf = scene->fb.cbufs[buf];
393 const unsigned face = cbuf->face, level = cbuf->level;
394 struct llvmpipe_resource *lpt = llvmpipe_resource(cbuf->texture);
395 /* this will convert the tiled data to linear if needed */
396 (void) llvmpipe_get_texture_tile_linear(lpt, face, level,
397 LP_TEX_USAGE_READ,
398 task->x, task->y);
399 }
400 }
401
402
403 /**
404 * This is a bin command called during bin processing.
405 */
406 void
407 lp_rast_set_state(struct lp_rasterizer_task *task,
408 const union lp_rast_cmd_arg arg)
409 {
410 const struct lp_rast_state *state = arg.set_state;
411
412 LP_DBG(DEBUG_RAST, "%s %p\n", __FUNCTION__, (void *) state);
413
414 /* just set the current state pointer for this rasterizer */
415 task->current_state = state;
416 }
417
418
419 /**
420 * Run the shader on all blocks in a tile. This is used when a tile is
421 * completely contained inside a triangle.
422 * This is a bin command called during bin processing.
423 */
424 void
425 lp_rast_shade_tile(struct lp_rasterizer_task *task,
426 const union lp_rast_cmd_arg arg)
427 {
428 struct lp_rasterizer *rast = task->rast;
429 const struct lp_rast_state *state = task->current_state;
430 const struct lp_rast_shader_inputs *inputs = arg.shade_tile;
431 const unsigned tile_x = task->x, tile_y = task->y;
432 unsigned x, y;
433
434 LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__);
435
436 /* render the whole 64x64 tile in 4x4 chunks */
437 for (y = 0; y < TILE_SIZE; y += 4){
438 for (x = 0; x < TILE_SIZE; x += 4) {
439 uint8_t *color[PIPE_MAX_COLOR_BUFS];
440 uint32_t *depth;
441 unsigned i;
442
443 /* color buffer */
444 for (i = 0; i < rast->state.nr_cbufs; i++)
445 color[i] = lp_rast_get_color_block_pointer(task, i,
446 tile_x + x, tile_y + y);
447
448 /* depth buffer */
449 depth = lp_rast_get_depth_block_pointer(rast, tile_x + x, tile_y + y);
450
451 /* run shader on 4x4 block */
452 state->jit_function[RAST_WHOLE]( &state->jit_context,
453 tile_x + x, tile_y + y,
454 inputs->facing,
455 inputs->a0,
456 inputs->dadx,
457 inputs->dady,
458 color,
459 depth,
460 INT_MIN, INT_MIN, INT_MIN,
461 NULL, NULL, NULL, &task->vis_counter);
462 }
463 }
464 }
465
466
467 /**
468 * Compute shading for a 4x4 block of pixels.
469 * This is a bin command called during bin processing.
470 * \param x X position of quad in window coords
471 * \param y Y position of quad in window coords
472 */
473 void lp_rast_shade_quads( struct lp_rasterizer_task *task,
474 const struct lp_rast_shader_inputs *inputs,
475 unsigned x, unsigned y,
476 int32_t c1, int32_t c2, int32_t c3)
477 {
478 const struct lp_rast_state *state = task->current_state;
479 struct lp_rasterizer *rast = task->rast;
480 uint8_t *color[PIPE_MAX_COLOR_BUFS];
481 void *depth;
482 unsigned i;
483
484 assert(state);
485
486 /* Sanity checks */
487 assert(x % TILE_VECTOR_WIDTH == 0);
488 assert(y % TILE_VECTOR_HEIGHT == 0);
489
490 assert((x % 4) == 0);
491 assert((y % 4) == 0);
492
493 /* color buffer */
494 for (i = 0; i < rast->state.nr_cbufs; i++) {
495 color[i] = lp_rast_get_color_block_pointer(task, i, x, y);
496 assert(lp_check_alignment(color[i], 16));
497 }
498
499 /* depth buffer */
500 depth = lp_rast_get_depth_block_pointer(rast, x, y);
501
502
503 assert(lp_check_alignment(state->jit_context.blend_color, 16));
504
505 assert(lp_check_alignment(inputs->step[0], 16));
506 assert(lp_check_alignment(inputs->step[1], 16));
507 assert(lp_check_alignment(inputs->step[2], 16));
508
509 /* run shader on 4x4 block */
510 state->jit_function[RAST_EDGE_TEST]( &state->jit_context,
511 x, y,
512 inputs->facing,
513 inputs->a0,
514 inputs->dadx,
515 inputs->dady,
516 color,
517 depth,
518 c1, c2, c3,
519 inputs->step[0],
520 inputs->step[1],
521 inputs->step[2],
522 &task->vis_counter);
523 }
524
525
526 /**
527 * Set top row and left column of the tile's pixels to white. For debugging.
528 */
529 static void
530 outline_tile(uint8_t *tile)
531 {
532 const uint8_t val = 0xff;
533 unsigned i;
534
535 for (i = 0; i < TILE_SIZE; i++) {
536 TILE_PIXEL(tile, i, 0, 0) = val;
537 TILE_PIXEL(tile, i, 0, 1) = val;
538 TILE_PIXEL(tile, i, 0, 2) = val;
539 TILE_PIXEL(tile, i, 0, 3) = val;
540
541 TILE_PIXEL(tile, 0, i, 0) = val;
542 TILE_PIXEL(tile, 0, i, 1) = val;
543 TILE_PIXEL(tile, 0, i, 2) = val;
544 TILE_PIXEL(tile, 0, i, 3) = val;
545 }
546 }
547
548
549 /**
550 * Draw grid of gray lines at 16-pixel intervals across the tile to
551 * show the sub-tile boundaries. For debugging.
552 */
553 static void
554 outline_subtiles(uint8_t *tile)
555 {
556 const uint8_t val = 0x80;
557 const unsigned step = 16;
558 unsigned i, j;
559
560 for (i = 0; i < TILE_SIZE; i += step) {
561 for (j = 0; j < TILE_SIZE; j++) {
562 TILE_PIXEL(tile, i, j, 0) = val;
563 TILE_PIXEL(tile, i, j, 1) = val;
564 TILE_PIXEL(tile, i, j, 2) = val;
565 TILE_PIXEL(tile, i, j, 3) = val;
566
567 TILE_PIXEL(tile, j, i, 0) = val;
568 TILE_PIXEL(tile, j, i, 1) = val;
569 TILE_PIXEL(tile, j, i, 2) = val;
570 TILE_PIXEL(tile, j, i, 3) = val;
571 }
572 }
573
574 outline_tile(tile);
575 }
576
577
578
579 /**
580 * Called when we're done writing to a color tile.
581 */
582 static void
583 lp_rast_tile_end(struct lp_rasterizer_task *task)
584 {
585 #if DEBUG
586 struct lp_rasterizer *rast = task->rast;
587 unsigned buf;
588
589 for (buf = 0; buf < rast->state.nr_cbufs; buf++) {
590 uint8_t *color = lp_rast_get_color_block_pointer(task, buf,
591 task->x, task->y);
592
593 if (LP_DEBUG & DEBUG_SHOW_SUBTILES)
594 outline_subtiles(color);
595 else if (LP_DEBUG & DEBUG_SHOW_TILES)
596 outline_tile(color);
597 }
598 #else
599 (void) outline_subtiles;
600 #endif
601
602 /* debug */
603 memset(task->color_tiles, 0, sizeof(task->color_tiles));
604 task->depth_tile = NULL;
605 }
606
607
608
609 /**
610 * Signal on a fence. This is called during bin execution/rasterization.
611 * Called per thread.
612 */
613 void
614 lp_rast_fence(struct lp_rasterizer_task *task,
615 const union lp_rast_cmd_arg arg)
616 {
617 struct lp_fence *fence = arg.fence;
618 lp_fence_signal(fence);
619 }
620
621
622 /**
623 * Begin a new occlusion query.
624 * This is a bin command put in all bins.
625 * Called per thread.
626 */
627 void
628 lp_rast_begin_query(struct lp_rasterizer_task *task,
629 const union lp_rast_cmd_arg arg)
630 {
631 /* Reset the the per-task counter */
632 task->vis_counter = 0;
633 }
634
635
636 /**
637 * End the current occlusion query.
638 * This is a bin command put in all bins.
639 * Called per thread.
640 */
641 void
642 lp_rast_end_query(struct lp_rasterizer_task *task,
643 const union lp_rast_cmd_arg arg)
644 {
645 struct llvmpipe_query *pq = arg.query_obj;
646
647 pipe_mutex_lock(pq->mutex);
648 {
649 /* Accumulate the visible fragment counter from this tile in
650 * the query object.
651 */
652 pq->count[task->thread_index] += task->vis_counter;
653
654 /* check if this is the last tile in the scene */
655 pq->tile_count++;
656 if (pq->tile_count == pq->num_tiles) {
657 uint i;
658
659 /* sum the per-thread counters for the query */
660 pq->result = 0;
661 for (i = 0; i < LP_MAX_THREADS; i++) {
662 pq->result += pq->count[i];
663 }
664
665 /* reset counters (in case this query is re-used in the scene) */
666 memset(pq->count, 0, sizeof(pq->count));
667
668 pq->tile_count = 0;
669 pq->binned = FALSE;
670 pq->done = TRUE;
671 }
672 }
673 pipe_mutex_unlock(pq->mutex);
674 }
675
676
677
678 /**
679 * Rasterize commands for a single bin.
680 * \param x, y position of the bin's tile in the framebuffer
681 * Must be called between lp_rast_begin() and lp_rast_end().
682 * Called per thread.
683 */
684 static void
685 rasterize_bin(struct lp_rasterizer_task *task,
686 const struct cmd_bin *bin,
687 int x, int y)
688 {
689 const struct cmd_block_list *commands = &bin->commands;
690 struct cmd_block *block;
691 unsigned k;
692
693 lp_rast_tile_begin( task, x * TILE_SIZE, y * TILE_SIZE );
694
695 /* simply execute each of the commands in the block list */
696 for (block = commands->head; block; block = block->next) {
697 for (k = 0; k < block->count; k++) {
698 block->cmd[k]( task, block->arg[k] );
699 }
700 }
701
702 lp_rast_tile_end(task);
703
704 /* Free data for this bin.
705 */
706 lp_scene_bin_reset( task->rast->curr_scene, x, y);
707 }
708
709
710 #define RAST(x) { lp_rast_##x, #x }
711
712 static struct {
713 lp_rast_cmd cmd;
714 const char *name;
715 } cmd_names[] =
716 {
717 RAST(clear_color),
718 RAST(clear_zstencil),
719 RAST(triangle),
720 RAST(shade_tile),
721 RAST(set_state),
722 RAST(store_color),
723 RAST(fence),
724 RAST(begin_query),
725 RAST(end_query),
726 };
727
728 static void
729 debug_bin( const struct cmd_bin *bin )
730 {
731 const struct cmd_block *head = bin->commands.head;
732 int i, j;
733
734 for (i = 0; i < head->count; i++) {
735 debug_printf("%d: ", i);
736 for (j = 0; j < Elements(cmd_names); j++) {
737 if (head->cmd[i] == cmd_names[j].cmd) {
738 debug_printf("%s\n", cmd_names[j].name);
739 break;
740 }
741 }
742 if (j == Elements(cmd_names))
743 debug_printf("...other\n");
744 }
745
746 }
747
748 /* An empty bin is one that just loads the contents of the tile and
749 * stores them again unchanged. This typically happens when bins have
750 * been flushed for some reason in the middle of a frame, or when
751 * incremental updates are being made to a render target.
752 *
753 * Try to avoid doing pointless work in this case.
754 */
755 static boolean
756 is_empty_bin( const struct cmd_bin *bin )
757 {
758 const struct cmd_block *head = bin->commands.head;
759 int i;
760
761 if (0)
762 debug_bin(bin);
763
764 /* We emit at most two load-tile commands at the start of the first
765 * command block. In addition we seem to emit a couple of
766 * set-state commands even in empty bins.
767 *
768 * As a heuristic, if a bin has more than 4 commands, consider it
769 * non-empty.
770 */
771 if (head->next != NULL ||
772 head->count > 4) {
773 return FALSE;
774 }
775
776 for (i = 0; i < head->count; i++)
777 if (head->cmd[i] != lp_rast_set_state) {
778 return FALSE;
779 }
780
781 return TRUE;
782 }
783
784
785
786 /**
787 * Rasterize/execute all bins within a scene.
788 * Called per thread.
789 */
790 static void
791 rasterize_scene(struct lp_rasterizer_task *task,
792 struct lp_scene *scene)
793 {
794 /* loop over scene bins, rasterize each */
795 #if 0
796 {
797 unsigned i, j;
798 for (i = 0; i < scene->tiles_x; i++) {
799 for (j = 0; j < scene->tiles_y; j++) {
800 struct cmd_bin *bin = lp_scene_get_bin(scene, i, j);
801 rasterize_bin(task, bin, i, j);
802 }
803 }
804 }
805 #else
806 {
807 struct cmd_bin *bin;
808 int x, y;
809
810 assert(scene);
811 while ((bin = lp_scene_bin_iter_next(scene, &x, &y))) {
812 if (!is_empty_bin( bin ))
813 rasterize_bin(task, bin, x, y);
814 }
815 }
816 #endif
817 }
818
819
820 /**
821 * Called by setup module when it has something for us to render.
822 */
823 void
824 lp_rast_queue_scene( struct lp_rasterizer *rast,
825 struct lp_scene *scene)
826 {
827 LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__);
828
829 if (rast->num_threads == 0) {
830 /* no threading */
831
832 lp_rast_begin( rast, scene );
833
834 rasterize_scene( &rast->tasks[0], scene );
835
836 lp_scene_reset( scene );
837
838 lp_rast_end( rast );
839
840 rast->curr_scene = NULL;
841 }
842 else {
843 /* threaded rendering! */
844 unsigned i;
845
846 lp_scene_enqueue( rast->full_scenes, scene );
847
848 /* signal the threads that there's work to do */
849 for (i = 0; i < rast->num_threads; i++) {
850 pipe_semaphore_signal(&rast->tasks[i].work_ready);
851 }
852 }
853
854 LP_DBG(DEBUG_SETUP, "%s done \n", __FUNCTION__);
855 }
856
857
858 void
859 lp_rast_finish( struct lp_rasterizer *rast )
860 {
861 if (rast->num_threads == 0) {
862 /* nothing to do */
863 }
864 else {
865 int i;
866
867 /* wait for work to complete */
868 for (i = 0; i < rast->num_threads; i++) {
869 pipe_semaphore_wait(&rast->tasks[i].work_done);
870 }
871 }
872 }
873
874
875 /**
876 * This is the thread's main entrypoint.
877 * It's a simple loop:
878 * 1. wait for work
879 * 2. do work
880 * 3. signal that we're done
881 */
882 static PIPE_THREAD_ROUTINE( thread_func, init_data )
883 {
884 struct lp_rasterizer_task *task = (struct lp_rasterizer_task *) init_data;
885 struct lp_rasterizer *rast = task->rast;
886 boolean debug = false;
887
888 while (1) {
889 /* wait for work */
890 if (debug)
891 debug_printf("thread %d waiting for work\n", task->thread_index);
892 pipe_semaphore_wait(&task->work_ready);
893
894 if (rast->exit_flag)
895 break;
896
897 if (task->thread_index == 0) {
898 /* thread[0]:
899 * - get next scene to rasterize
900 * - map the framebuffer surfaces
901 */
902 lp_rast_begin( rast,
903 lp_scene_dequeue( rast->full_scenes, TRUE ) );
904 }
905
906 /* Wait for all threads to get here so that threads[1+] don't
907 * get a null rast->curr_scene pointer.
908 */
909 pipe_barrier_wait( &rast->barrier );
910
911 /* do work */
912 if (debug)
913 debug_printf("thread %d doing work\n", task->thread_index);
914
915 rasterize_scene(task,
916 rast->curr_scene);
917
918 /* wait for all threads to finish with this scene */
919 pipe_barrier_wait( &rast->barrier );
920
921 /* XXX: shouldn't be necessary:
922 */
923 if (task->thread_index == 0) {
924 lp_rast_end( rast );
925 }
926
927 /* signal done with work */
928 if (debug)
929 debug_printf("thread %d done working\n", task->thread_index);
930
931 pipe_semaphore_signal(&task->work_done);
932 }
933
934 return NULL;
935 }
936
937
938 /**
939 * Initialize semaphores and spawn the threads.
940 */
941 static void
942 create_rast_threads(struct lp_rasterizer *rast)
943 {
944 unsigned i;
945
946 /* NOTE: if num_threads is zero, we won't use any threads */
947 for (i = 0; i < rast->num_threads; i++) {
948 pipe_semaphore_init(&rast->tasks[i].work_ready, 0);
949 pipe_semaphore_init(&rast->tasks[i].work_done, 0);
950 rast->threads[i] = pipe_thread_create(thread_func,
951 (void *) &rast->tasks[i]);
952 }
953 }
954
955
956
957 /**
958 * Create new lp_rasterizer. If num_threads is zero, don't create any
959 * new threads, do rendering synchronously.
960 * \param num_threads number of rasterizer threads to create
961 */
962 struct lp_rasterizer *
963 lp_rast_create( unsigned num_threads )
964 {
965 struct lp_rasterizer *rast;
966 unsigned i;
967
968 rast = CALLOC_STRUCT(lp_rasterizer);
969 if(!rast)
970 return NULL;
971
972 rast->full_scenes = lp_scene_queue_create();
973
974 for (i = 0; i < Elements(rast->tasks); i++) {
975 struct lp_rasterizer_task *task = &rast->tasks[i];
976 task->rast = rast;
977 task->thread_index = i;
978 }
979
980 rast->num_threads = num_threads;
981
982 create_rast_threads(rast);
983
984 /* for synchronizing rasterization threads */
985 pipe_barrier_init( &rast->barrier, rast->num_threads );
986
987 return rast;
988 }
989
990
991 /* Shutdown:
992 */
993 void lp_rast_destroy( struct lp_rasterizer *rast )
994 {
995 unsigned i;
996
997 /* Set exit_flag and signal each thread's work_ready semaphore.
998 * Each thread will be woken up, notice that the exit_flag is set and
999 * break out of its main loop. The thread will then exit.
1000 */
1001 rast->exit_flag = TRUE;
1002 for (i = 0; i < rast->num_threads; i++) {
1003 pipe_semaphore_signal(&rast->tasks[i].work_ready);
1004 }
1005
1006 /* Wait for threads to terminate before cleaning up per-thread data */
1007 for (i = 0; i < rast->num_threads; i++) {
1008 pipe_thread_wait(rast->threads[i]);
1009 }
1010
1011 /* Clean up per-thread data */
1012 for (i = 0; i < rast->num_threads; i++) {
1013 pipe_semaphore_destroy(&rast->tasks[i].work_ready);
1014 pipe_semaphore_destroy(&rast->tasks[i].work_done);
1015 }
1016
1017 /* for synchronizing rasterization threads */
1018 pipe_barrier_destroy( &rast->barrier );
1019
1020 lp_scene_queue_destroy(rast->full_scenes);
1021
1022 FREE(rast);
1023 }
1024
1025
1026 /** Return number of rasterization threads */
1027 unsigned
1028 lp_rast_get_num_threads( struct lp_rasterizer *rast )
1029 {
1030 return rast->num_threads;
1031 }
1032
1033