llvmpipe: initial mrt support
[mesa.git] / src / gallium / drivers / llvmpipe / lp_rast.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 #include <limits.h>
29 #include "util/u_memory.h"
30 #include "util/u_math.h"
31 #include "util/u_cpu_detect.h"
32 #include "util/u_surface.h"
33
34 #include "lp_scene_queue.h"
35 #include "lp_debug.h"
36 #include "lp_fence.h"
37 #include "lp_rast.h"
38 #include "lp_rast_priv.h"
39 #include "lp_tile_soa.h"
40 #include "lp_bld_debug.h"
41 #include "lp_scene.h"
42
43
44 /**
45 * Begin the rasterization phase.
46 * Map the framebuffer surfaces. Initialize the 'rast' state.
47 */
48 static boolean
49 lp_rast_begin( struct lp_rasterizer *rast,
50 const struct pipe_framebuffer_state *fb,
51 boolean write_color,
52 boolean write_zstencil )
53 {
54 struct pipe_screen *screen = rast->screen;
55 struct pipe_surface *cbuf, *zsbuf;
56 int i;
57
58 LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__);
59
60 util_copy_framebuffer_state(&rast->state.fb, fb);
61
62 rast->state.write_zstencil = write_zstencil;
63 rast->state.write_color = write_color;
64
65 rast->check_for_clipped_tiles = (fb->width % TILE_SIZE != 0 ||
66 fb->height % TILE_SIZE != 0);
67
68
69 for (i = 0; i < rast->state.fb.nr_cbufs; i++) {
70 cbuf = rast->state.fb.cbufs[i];
71 if (cbuf) {
72 rast->cbuf_transfer[i] = screen->get_tex_transfer(rast->screen,
73 cbuf->texture,
74 cbuf->face,
75 cbuf->level,
76 cbuf->zslice,
77 PIPE_TRANSFER_READ_WRITE,
78 0, 0,
79 cbuf->width,
80 cbuf->height);
81 if (!rast->cbuf_transfer[i])
82 goto fail;
83
84 rast->cbuf_map[i] = screen->transfer_map(rast->screen,
85 rast->cbuf_transfer[i]);
86 if (!rast->cbuf_map[i])
87 goto fail;
88 }
89 }
90
91 zsbuf = rast->state.fb.zsbuf;
92 if (zsbuf) {
93 rast->zsbuf_transfer = screen->get_tex_transfer(rast->screen,
94 zsbuf->texture,
95 zsbuf->face,
96 zsbuf->level,
97 zsbuf->zslice,
98 PIPE_TRANSFER_READ_WRITE,
99 0, 0,
100 zsbuf->width,
101 zsbuf->height);
102 if (!rast->zsbuf_transfer)
103 goto fail;
104
105 rast->zsbuf_map = screen->transfer_map(rast->screen,
106 rast->zsbuf_transfer);
107 if (!rast->zsbuf_map)
108 goto fail;
109 }
110
111 return TRUE;
112
113 fail:
114 /* Unmap and release transfers?
115 */
116 return FALSE;
117 }
118
119
120 /**
121 * Finish the rasterization phase.
122 * Unmap framebuffer surfaces.
123 */
124 static void
125 lp_rast_end( struct lp_rasterizer *rast )
126 {
127 struct pipe_screen *screen = rast->screen;
128 unsigned i;
129
130 for (i = 0; i < rast->state.fb.nr_cbufs; i++) {
131 if (rast->cbuf_map[i])
132 screen->transfer_unmap(screen, rast->cbuf_transfer[i]);
133
134 if (rast->cbuf_transfer[i])
135 screen->tex_transfer_destroy(rast->cbuf_transfer[i]);
136
137 rast->cbuf_transfer[i] = NULL;
138 rast->cbuf_map[i] = NULL;
139 }
140
141 if (rast->zsbuf_map)
142 screen->transfer_unmap(screen, rast->zsbuf_transfer);
143
144 if (rast->zsbuf_transfer)
145 screen->tex_transfer_destroy(rast->zsbuf_transfer);
146
147 rast->zsbuf_transfer = NULL;
148 rast->zsbuf_map = NULL;
149 }
150
151
152 /**
153 * Begining rasterization of a tile.
154 * \param x window X position of the tile, in pixels
155 * \param y window Y position of the tile, in pixels
156 */
157 static void
158 lp_rast_start_tile( struct lp_rasterizer *rast,
159 unsigned thread_index,
160 unsigned x, unsigned y )
161 {
162 LP_DBG(DEBUG_RAST, "%s %d,%d\n", __FUNCTION__, x, y);
163
164 rast->tasks[thread_index].x = x;
165 rast->tasks[thread_index].y = y;
166 }
167
168
169 /**
170 * Clear the rasterizer's current color tile.
171 * This is a bin command called during bin processing.
172 */
173 void lp_rast_clear_color( struct lp_rasterizer *rast,
174 unsigned thread_index,
175 const union lp_rast_cmd_arg arg )
176 {
177 const uint8_t *clear_color = arg.clear_color;
178 uint8_t **color_tile = rast->tasks[thread_index].tile.color;
179 unsigned i;
180
181 LP_DBG(DEBUG_RAST, "%s 0x%x,0x%x,0x%x,0x%x\n", __FUNCTION__,
182 clear_color[0],
183 clear_color[1],
184 clear_color[2],
185 clear_color[3]);
186
187 if (clear_color[0] == clear_color[1] &&
188 clear_color[1] == clear_color[2] &&
189 clear_color[2] == clear_color[3]) {
190 for (i = 0; i < rast->state.fb.nr_cbufs; i++) {
191 memset(color_tile[i], clear_color[0], TILE_SIZE * TILE_SIZE * 4);
192 }
193 }
194 else {
195 unsigned x, y, chan;
196 for (i = 0; i < rast->state.fb.nr_cbufs; i++)
197 for (y = 0; y < TILE_SIZE; y++)
198 for (x = 0; x < TILE_SIZE; x++)
199 for (chan = 0; chan < 4; ++chan)
200 TILE_PIXEL(color_tile[i], x, y, chan) = clear_color[chan];
201 }
202 }
203
204
205 /**
206 * Clear the rasterizer's current z/stencil tile.
207 * This is a bin command called during bin processing.
208 */
209 void lp_rast_clear_zstencil( struct lp_rasterizer *rast,
210 unsigned thread_index,
211 const union lp_rast_cmd_arg arg)
212 {
213 unsigned i, j;
214 uint32_t *depth_tile = rast->tasks[thread_index].tile.depth;
215
216 LP_DBG(DEBUG_RAST, "%s 0x%x\n", __FUNCTION__, arg.clear_zstencil);
217
218 for (i = 0; i < TILE_SIZE; i++)
219 for (j = 0; j < TILE_SIZE; j++)
220 depth_tile[i*TILE_SIZE + j] = arg.clear_zstencil;
221 }
222
223
224 /**
225 * Load tile color from the framebuffer surface.
226 * This is a bin command called during bin processing.
227 */
228 void lp_rast_load_color( struct lp_rasterizer *rast,
229 unsigned thread_index,
230 const union lp_rast_cmd_arg arg)
231 {
232 struct lp_rasterizer_task *task = &rast->tasks[thread_index];
233 const unsigned x = task->x;
234 const unsigned y = task->y;
235 unsigned i;
236
237 LP_DBG(DEBUG_RAST, "%s at %u, %u\n", __FUNCTION__, x, y);
238
239 for (i = 0; i < rast->state.fb.nr_cbufs; i++) {
240 struct pipe_transfer *transfer = rast->cbuf_transfer[i];
241 int w = TILE_SIZE;
242 int h = TILE_SIZE;
243
244 if (x >= transfer->width)
245 continue;
246
247 if (y >= transfer->height)
248 continue;
249 /* XXX: require tile-size aligned render target dimensions:
250 */
251 if (x + w > transfer->width)
252 w -= x + w - transfer->width;
253
254 if (y + h > transfer->height)
255 h -= y + h - transfer->height;
256
257 assert(w >= 0);
258 assert(h >= 0);
259 assert(w <= TILE_SIZE);
260 assert(h <= TILE_SIZE);
261
262 lp_tile_read_4ub(transfer->texture->format,
263 rast->tasks[thread_index].tile.color[i],
264 rast->cbuf_map[i],
265 transfer->stride,
266 x, y,
267 w, h);
268 }
269 }
270
271
272 /**
273 * Load tile z/stencil from the framebuffer surface.
274 * This is a bin command called during bin processing.
275 */
276 void lp_rast_load_zstencil( struct lp_rasterizer *rast,
277 unsigned thread_index,
278 const union lp_rast_cmd_arg arg )
279 {
280 LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__);
281
282 /* call u_tile func to load depth (and stencil?) from surface */
283 }
284
285
286 void lp_rast_set_state( struct lp_rasterizer *rast,
287 unsigned thread_index,
288 const union lp_rast_cmd_arg arg )
289 {
290 const struct lp_rast_state *state = arg.set_state;
291
292 LP_DBG(DEBUG_RAST, "%s %p\n", __FUNCTION__, (void *) state);
293
294 /* just set the current state pointer for this rasterizer */
295 rast->tasks[thread_index].current_state = state;
296 }
297
298
299
300 /* Within a tile:
301 */
302
303 /**
304 * Run the shader on all blocks in a tile. This is used when a tile is
305 * completely contained inside a triangle.
306 * This is a bin command called during bin processing.
307 */
308 void lp_rast_shade_tile( struct lp_rasterizer *rast,
309 unsigned thread_index,
310 const union lp_rast_cmd_arg arg )
311 {
312 /* Set c1,c2,c3 to large values so the in/out test always passes */
313 const int32_t c1 = INT_MIN, c2 = INT_MIN, c3 = INT_MIN;
314 const struct lp_rast_shader_inputs *inputs = arg.shade_tile;
315 const unsigned tile_x = rast->tasks[thread_index].x;
316 const unsigned tile_y = rast->tasks[thread_index].y;
317 unsigned x, y;
318
319 LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__);
320
321 /* Use the existing preference for 4x4 (four quads) shading:
322 */
323 for (y = 0; y < TILE_SIZE; y += 4)
324 for (x = 0; x < TILE_SIZE; x += 4)
325 lp_rast_shade_quads( rast,
326 thread_index,
327 inputs,
328 tile_x + x,
329 tile_y + y,
330 c1, c2, c3);
331 }
332
333
334 /**
335 * Compute shading for a 4x4 block of pixels.
336 * This is a bin command called during bin processing.
337 */
338 void lp_rast_shade_quads( struct lp_rasterizer *rast,
339 unsigned thread_index,
340 const struct lp_rast_shader_inputs *inputs,
341 unsigned x, unsigned y,
342 int32_t c1, int32_t c2, int32_t c3)
343 {
344 const struct lp_rast_state *state = rast->tasks[thread_index].current_state;
345 struct lp_rast_tile *tile = &rast->tasks[thread_index].tile;
346 uint8_t *color[PIPE_MAX_COLOR_BUFS];
347 void *depth;
348 unsigned i;
349 unsigned ix, iy;
350 int block_offset;
351
352 #ifdef DEBUG
353 assert(state);
354
355 /* Sanity checks */
356 assert(x % TILE_VECTOR_WIDTH == 0);
357 assert(y % TILE_VECTOR_HEIGHT == 0);
358
359 assert((x % 4) == 0);
360 assert((y % 4) == 0);
361 #endif
362
363 ix = x % TILE_SIZE;
364 iy = y % TILE_SIZE;
365
366 /* offset of the 16x16 pixel block within the tile */
367 block_offset = ((iy/4)*(16*16) + (ix/4)*16);
368
369 /* color buffer */
370 for (i = 0; i < rast->state.fb.nr_cbufs; i++)
371 color[i] = tile->color[i] + 4 * block_offset;
372
373 /* depth buffer */
374 depth = tile->depth + block_offset;
375
376
377
378 #ifdef DEBUG
379 assert(lp_check_alignment(tile->depth, 16));
380 assert(lp_check_alignment(tile->color[0], 16));
381 assert(lp_check_alignment(state->jit_context.blend_color, 16));
382
383 assert(lp_check_alignment(inputs->step[0], 16));
384 assert(lp_check_alignment(inputs->step[1], 16));
385 assert(lp_check_alignment(inputs->step[2], 16));
386 #endif
387
388 /* run shader */
389 state->jit_function( &state->jit_context,
390 x, y,
391 inputs->a0,
392 inputs->dadx,
393 inputs->dady,
394 color,
395 depth,
396 c1, c2, c3,
397 inputs->step[0], inputs->step[1], inputs->step[2]);
398 }
399
400
401 /* End of tile:
402 */
403
404
405 /**
406 * Write the rasterizer's color tile to the framebuffer.
407 */
408 static void lp_rast_store_color( struct lp_rasterizer *rast,
409 unsigned thread_index)
410 {
411 const unsigned x = rast->tasks[thread_index].x;
412 const unsigned y = rast->tasks[thread_index].y;
413 unsigned i;
414
415 for (i = 0; i < rast->state.fb.nr_cbufs; i++) {
416 struct pipe_transfer *transfer = rast->cbuf_transfer[i];
417 int w = TILE_SIZE;
418 int h = TILE_SIZE;
419
420 if (x >= transfer->width)
421 continue;
422
423 if (y >= transfer->height)
424 continue;
425
426 /* XXX: require tile-size aligned render target dimensions:
427 */
428 if (x + w > transfer->width)
429 w -= x + w - transfer->width;
430
431 if (y + h > transfer->height)
432 h -= y + h - transfer->height;
433
434 assert(w >= 0);
435 assert(h >= 0);
436 assert(w <= TILE_SIZE);
437 assert(h <= TILE_SIZE);
438
439 LP_DBG(DEBUG_RAST, "%s [%u] %d,%d %dx%d\n", __FUNCTION__,
440 thread_index, x, y, w, h);
441
442 lp_tile_write_4ub(transfer->texture->format,
443 rast->tasks[thread_index].tile.color[i],
444 rast->cbuf_map[i],
445 transfer->stride,
446 x, y,
447 w, h);
448 }
449 }
450
451
452 static void
453 lp_tile_write_z32(const uint32_t *src, uint8_t *dst, unsigned dst_stride,
454 unsigned x0, unsigned y0, unsigned w, unsigned h)
455 {
456 unsigned x, y;
457 uint8_t *dst_row = dst + y0*dst_stride;
458 for (y = 0; y < h; ++y) {
459 uint32_t *dst_pixel = (uint32_t *)(dst_row + x0*4);
460 for (x = 0; x < w; ++x) {
461 *dst_pixel++ = *src++;
462 }
463 dst_row += dst_stride;
464 }
465 }
466
467 /**
468 * Write the rasterizer's z/stencil tile to the framebuffer.
469 */
470 static void lp_rast_store_zstencil( struct lp_rasterizer *rast,
471 unsigned thread_index )
472 {
473 const unsigned x = rast->tasks[thread_index].x;
474 const unsigned y = rast->tasks[thread_index].y;
475 unsigned w = TILE_SIZE;
476 unsigned h = TILE_SIZE;
477
478 if (x + w > rast->state.fb.width)
479 w -= x + w - rast->state.fb.width;
480
481 if (y + h > rast->state.fb.height)
482 h -= y + h - rast->state.fb.height;
483
484 LP_DBG(DEBUG_RAST, "%s %d,%d %dx%d\n", __FUNCTION__, x, y, w, h);
485
486 assert(rast->zsbuf_transfer->texture->format == PIPE_FORMAT_Z32_UNORM);
487 lp_tile_write_z32(rast->tasks[thread_index].tile.depth,
488 rast->zsbuf_map,
489 rast->zsbuf_transfer->stride,
490 x, y, w, h);
491 }
492
493
494 /**
495 * Write the rasterizer's tiles to the framebuffer.
496 */
497 static void
498 lp_rast_end_tile( struct lp_rasterizer *rast,
499 unsigned thread_index )
500 {
501 LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__);
502
503 if (rast->state.write_color)
504 lp_rast_store_color(rast, thread_index);
505
506 if (rast->state.write_zstencil)
507 lp_rast_store_zstencil(rast, thread_index);
508 }
509
510
511 /**
512 * Signal on a fence. This is called during bin execution/rasterization.
513 * Called per thread.
514 */
515 void lp_rast_fence( struct lp_rasterizer *rast,
516 unsigned thread_index,
517 const union lp_rast_cmd_arg arg )
518 {
519 struct lp_fence *fence = arg.fence;
520
521 pipe_mutex_lock( fence->mutex );
522
523 fence->count++;
524 assert(fence->count <= fence->rank);
525
526 LP_DBG(DEBUG_RAST, "%s count=%u rank=%u\n", __FUNCTION__,
527 fence->count, fence->rank);
528
529 pipe_condvar_signal( fence->signalled );
530
531 pipe_mutex_unlock( fence->mutex );
532 }
533
534
535 /**
536 * When all the threads are done rasterizing a scene, one thread will
537 * call this function to reset the scene and put it onto the empty queue.
538 */
539 static void
540 release_scene( struct lp_rasterizer *rast,
541 struct lp_scene *scene )
542 {
543 util_unreference_framebuffer_state( &scene->fb );
544
545 lp_scene_reset( scene );
546 lp_scene_enqueue( rast->empty_scenes, scene );
547 rast->curr_scene = NULL;
548 }
549
550
551 /**
552 * Rasterize commands for a single bin.
553 * \param x, y position of the bin's tile in the framebuffer
554 * Must be called between lp_rast_begin() and lp_rast_end().
555 * Called per thread.
556 */
557 static void
558 rasterize_bin( struct lp_rasterizer *rast,
559 unsigned thread_index,
560 const struct cmd_bin *bin,
561 int x, int y)
562 {
563 const struct cmd_block_list *commands = &bin->commands;
564 struct cmd_block *block;
565 unsigned k;
566
567 lp_rast_start_tile( rast, thread_index, x, y );
568
569 /* simply execute each of the commands in the block list */
570 for (block = commands->head; block; block = block->next) {
571 for (k = 0; k < block->count; k++) {
572 block->cmd[k]( rast, thread_index, block->arg[k] );
573 }
574 }
575
576 lp_rast_end_tile( rast, thread_index );
577 }
578
579
580 /**
581 * Rasterize/execute all bins within a scene.
582 * Called per thread.
583 */
584 static void
585 rasterize_scene( struct lp_rasterizer *rast,
586 unsigned thread_index,
587 struct lp_scene *scene,
588 bool write_depth )
589 {
590 /* loop over scene bins, rasterize each */
591 #if 0
592 {
593 unsigned i, j;
594 for (i = 0; i < scene->tiles_x; i++) {
595 for (j = 0; j < scene->tiles_y; j++) {
596 struct cmd_bin *bin = lp_get_bin(scene, i, j);
597 rasterize_bin( rast, thread_index,
598 bin, i * TILE_SIZE, j * TILE_SIZE );
599 }
600 }
601 }
602 #else
603 {
604 struct cmd_bin *bin;
605 int x, y;
606
607 assert(scene);
608 while ((bin = lp_scene_bin_iter_next(scene, &x, &y))) {
609 rasterize_bin( rast, thread_index, bin, x * TILE_SIZE, y * TILE_SIZE);
610 }
611 }
612 #endif
613 }
614
615
616 /**
617 * Called by setup module when it has something for us to render.
618 */
619 void
620 lp_rasterize_scene( struct lp_rasterizer *rast,
621 struct lp_scene *scene,
622 const struct pipe_framebuffer_state *fb,
623 bool write_depth )
624 {
625 boolean debug = false;
626
627 LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__);
628
629 if (debug) {
630 unsigned x, y;
631 printf("rasterize scene:\n");
632 printf(" data size: %u\n", lp_scene_data_size(scene));
633 for (y = 0; y < scene->tiles_y; y++) {
634 for (x = 0; x < scene->tiles_x; x++) {
635 printf(" bin %u, %u size: %u\n", x, y,
636 lp_scene_bin_size(scene, x, y));
637 }
638 }
639 }
640
641 /* save framebuffer state in the bin */
642 util_copy_framebuffer_state(&scene->fb, fb);
643 scene->write_depth = write_depth;
644
645 if (rast->num_threads == 0) {
646 /* no threading */
647
648 lp_rast_begin( rast, fb,
649 fb->nr_cbufs != 0, /* always write color if cbufs present */
650 fb->zsbuf != NULL && write_depth );
651
652 lp_scene_bin_iter_begin( scene );
653 rasterize_scene( rast, 0, scene, write_depth );
654
655 release_scene( rast, scene );
656
657 lp_rast_end( rast );
658 }
659 else {
660 /* threaded rendering! */
661 unsigned i;
662
663 lp_scene_enqueue( rast->full_scenes, scene );
664
665 /* signal the threads that there's work to do */
666 for (i = 0; i < rast->num_threads; i++) {
667 pipe_semaphore_signal(&rast->tasks[i].work_ready);
668 }
669
670 /* wait for work to complete */
671 for (i = 0; i < rast->num_threads; i++) {
672 pipe_semaphore_wait(&rast->tasks[i].work_done);
673 }
674 }
675
676 LP_DBG(DEBUG_SETUP, "%s done \n", __FUNCTION__);
677 }
678
679
680 /**
681 * This is the thread's main entrypoint.
682 * It's a simple loop:
683 * 1. wait for work
684 * 2. do work
685 * 3. signal that we're done
686 */
687 static void *
688 thread_func( void *init_data )
689 {
690 struct lp_rasterizer_task *task = (struct lp_rasterizer_task *) init_data;
691 struct lp_rasterizer *rast = task->rast;
692 boolean debug = false;
693
694 while (1) {
695 /* wait for work */
696 if (debug)
697 debug_printf("thread %d waiting for work\n", task->thread_index);
698 pipe_semaphore_wait(&task->work_ready);
699
700 if (task->thread_index == 0) {
701 /* thread[0]:
702 * - get next scene to rasterize
703 * - map the framebuffer surfaces
704 */
705 const struct pipe_framebuffer_state *fb;
706 boolean write_depth;
707
708 rast->curr_scene = lp_scene_dequeue( rast->full_scenes );
709
710 lp_scene_bin_iter_begin( rast->curr_scene );
711
712 fb = &rast->curr_scene->fb;
713 write_depth = rast->curr_scene->write_depth;
714
715 lp_rast_begin( rast, fb,
716 fb->nr_cbufs != 0,
717 fb->zsbuf != NULL && write_depth );
718 }
719
720 /* Wait for all threads to get here so that threads[1+] don't
721 * get a null rast->curr_scene pointer.
722 */
723 pipe_barrier_wait( &rast->barrier );
724
725 /* do work */
726 if (debug)
727 debug_printf("thread %d doing work\n", task->thread_index);
728 rasterize_scene(rast,
729 task->thread_index,
730 rast->curr_scene,
731 rast->curr_scene->write_depth);
732
733 /* wait for all threads to finish with this scene */
734 pipe_barrier_wait( &rast->barrier );
735
736 if (task->thread_index == 0) {
737 /* thread[0]:
738 * - release the scene object
739 * - unmap the framebuffer surfaces
740 */
741 release_scene( rast, rast->curr_scene );
742 lp_rast_end( rast );
743 }
744
745 /* signal done with work */
746 if (debug)
747 debug_printf("thread %d done working\n", task->thread_index);
748 pipe_semaphore_signal(&task->work_done);
749 }
750
751 return NULL;
752 }
753
754
755 /**
756 * Initialize semaphores and spawn the threads.
757 */
758 static void
759 create_rast_threads(struct lp_rasterizer *rast)
760 {
761 unsigned i;
762
763 rast->num_threads = util_cpu_caps.nr_cpus;
764 rast->num_threads = debug_get_num_option("LP_NUM_THREADS", rast->num_threads);
765 rast->num_threads = MIN2(rast->num_threads, MAX_THREADS);
766
767 /* NOTE: if num_threads is zero, we won't use any threads */
768 for (i = 0; i < rast->num_threads; i++) {
769 pipe_semaphore_init(&rast->tasks[i].work_ready, 0);
770 pipe_semaphore_init(&rast->tasks[i].work_done, 0);
771 rast->threads[i] = pipe_thread_create(thread_func,
772 (void *) &rast->tasks[i]);
773 }
774 }
775
776
777
778 /**
779 * Create new lp_rasterizer.
780 * \param empty the queue to put empty scenes on after we've finished
781 * processing them.
782 */
783 struct lp_rasterizer *
784 lp_rast_create( struct pipe_screen *screen, struct lp_scene_queue *empty )
785 {
786 struct lp_rasterizer *rast;
787 unsigned i, cbuf;
788
789 rast = CALLOC_STRUCT(lp_rasterizer);
790 if(!rast)
791 return NULL;
792
793 rast->screen = screen;
794
795 rast->empty_scenes = empty;
796 rast->full_scenes = lp_scene_queue_create();
797
798 for (i = 0; i < Elements(rast->tasks); i++) {
799 for (cbuf = 0; cbuf < PIPE_MAX_COLOR_BUFS; cbuf++ )
800 rast->tasks[i].tile.color[cbuf] = align_malloc( TILE_SIZE*TILE_SIZE*4, 16 );
801
802 rast->tasks[i].tile.depth = align_malloc( TILE_SIZE*TILE_SIZE*4, 16 );
803 rast->tasks[i].rast = rast;
804 rast->tasks[i].thread_index = i;
805 }
806
807 create_rast_threads(rast);
808
809 /* for synchronizing rasterization threads */
810 pipe_barrier_init( &rast->barrier, rast->num_threads );
811
812 return rast;
813 }
814
815
816 /* Shutdown:
817 */
818 void lp_rast_destroy( struct lp_rasterizer *rast )
819 {
820 unsigned i, cbuf;
821
822 util_unreference_framebuffer_state(&rast->state.fb);
823
824 for (i = 0; i < Elements(rast->tasks); i++) {
825 align_free(rast->tasks[i].tile.depth);
826 for (cbuf = 0; cbuf < PIPE_MAX_COLOR_BUFS; cbuf++ )
827 align_free(rast->tasks[i].tile.color[cbuf]);
828 }
829
830 /* for synchronizing rasterization threads */
831 pipe_barrier_destroy( &rast->barrier );
832
833 FREE(rast);
834 }
835
836
837 /** Return number of rasterization threads */
838 unsigned
839 lp_rast_get_num_threads( struct lp_rasterizer *rast )
840 {
841 return rast->num_threads;
842 }