llvmpipe: improve framebuffer/surface code
[mesa.git] / src / gallium / drivers / llvmpipe / lp_rast.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 #include "util/u_memory.h"
29 #include "util/u_math.h"
30 #include "util/u_cpu_detect.h"
31 #include "util/u_surface.h"
32
33 #include "lp_bin_queue.h"
34 #include "lp_debug.h"
35 #include "lp_state.h"
36 #include "lp_rast.h"
37 #include "lp_rast_priv.h"
38 #include "lp_tile_soa.h"
39 #include "lp_bld_debug.h"
40 #include "lp_bin.h"
41
42
43
44 /**
45 * Called by rasterization threads to get the next chunk of work.
46 * We use a lock to make sure that all the threads get the same bins.
47 */
48 static struct lp_bins *
49 get_next_full_bin( struct lp_rasterizer *rast )
50 {
51 pipe_mutex_lock( rast->get_bin_mutex );
52 if (!rast->curr_bins) {
53 /* this will wait until there's something in the queue */
54 rast->curr_bins = lp_bins_dequeue( rast->full_bins );
55 rast->release_count = 0;
56
57 lp_bin_iter_begin( rast->curr_bins );
58 }
59 pipe_mutex_unlock( rast->get_bin_mutex );
60 return rast->curr_bins;
61 }
62
63
64 /**
65 * Called by rasterization threads after they've finished with
66 * the current bin. When all threads have called this, we reset
67 * the bin and put it into the 'empty bins' queue.
68 */
69 static void
70 release_current_bin( struct lp_rasterizer *rast )
71 {
72 pipe_mutex_lock( rast->get_bin_mutex );
73 rast->release_count++;
74 if (rast->release_count == rast->num_threads) {
75 assert(rast->curr_bins);
76 lp_reset_bins( rast->curr_bins );
77 lp_bins_enqueue( rast->empty_bins, rast->curr_bins );
78 rast->curr_bins = NULL;
79 }
80 pipe_mutex_unlock( rast->get_bin_mutex );
81 }
82
83
84
85 /**
86 * Begin the rasterization phase.
87 * Map the framebuffer surfaces. Initialize the 'rast' state.
88 */
89 static boolean
90 lp_rast_begin( struct lp_rasterizer *rast,
91 const struct pipe_framebuffer_state *fb,
92 boolean write_color,
93 boolean write_zstencil )
94 {
95 struct pipe_screen *screen = rast->screen;
96 struct pipe_surface *cbuf, *zsbuf;
97
98 LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__);
99
100 util_copy_framebuffer_state(&rast->state.fb, fb);
101
102 rast->state.write_zstencil = write_zstencil;
103 rast->state.write_color = write_color;
104
105 rast->check_for_clipped_tiles = (fb->width % TILE_SIZE != 0 ||
106 fb->height % TILE_SIZE != 0);
107
108 /* XXX support multiple color buffers here */
109 cbuf = rast->state.fb.cbufs[0];
110 if (cbuf) {
111 rast->cbuf_transfer = screen->get_tex_transfer(rast->screen,
112 cbuf->texture,
113 cbuf->face,
114 cbuf->level,
115 cbuf->zslice,
116 PIPE_TRANSFER_READ_WRITE,
117 0, 0,
118 fb->width, fb->height);
119 if (!rast->cbuf_transfer)
120 return FALSE;
121
122 rast->cbuf_map = screen->transfer_map(rast->screen,
123 rast->cbuf_transfer);
124 if (!rast->cbuf_map)
125 return FALSE;
126 }
127
128 zsbuf = rast->state.fb.zsbuf;
129 if (zsbuf) {
130 rast->zsbuf_transfer = screen->get_tex_transfer(rast->screen,
131 zsbuf->texture,
132 zsbuf->face,
133 zsbuf->level,
134 zsbuf->zslice,
135 PIPE_TRANSFER_READ_WRITE,
136 0, 0,
137 fb->width, fb->height);
138 if (!rast->zsbuf_transfer)
139 return FALSE;
140
141 rast->zsbuf_map = screen->transfer_map(rast->screen,
142 rast->zsbuf_transfer);
143 if (!rast->zsbuf_map)
144 return FALSE;
145 }
146
147 return TRUE;
148 }
149
150
151 /**
152 * Finish the rasterization phase.
153 * Unmap framebuffer surfaces.
154 */
155 static void
156 lp_rast_end( struct lp_rasterizer *rast )
157 {
158 struct pipe_screen *screen = rast->screen;
159
160 if (rast->cbuf_map)
161 screen->transfer_unmap(screen, rast->cbuf_transfer);
162
163 if (rast->zsbuf_map)
164 screen->transfer_unmap(screen, rast->zsbuf_transfer);
165
166 if (rast->cbuf_transfer)
167 screen->tex_transfer_destroy(rast->cbuf_transfer);
168
169 if (rast->zsbuf_transfer)
170 screen->tex_transfer_destroy(rast->zsbuf_transfer);
171
172 rast->cbuf_transfer = NULL;
173 rast->zsbuf_transfer = NULL;
174 rast->cbuf_map = NULL;
175 rast->zsbuf_map = NULL;
176 }
177
178
179 /**
180 * Begining rasterization of a tile.
181 * \param x window X position of the tile, in pixels
182 * \param y window Y position of the tile, in pixels
183 */
184 static void
185 lp_rast_start_tile( struct lp_rasterizer *rast,
186 unsigned thread_index,
187 unsigned x, unsigned y )
188 {
189 LP_DBG(DEBUG_RAST, "%s %d,%d\n", __FUNCTION__, x, y);
190
191 rast->tasks[thread_index].x = x;
192 rast->tasks[thread_index].y = y;
193 }
194
195
196 /**
197 * Clear the rasterizer's current color tile.
198 * This is a bin command called during bin processing.
199 */
200 void lp_rast_clear_color( struct lp_rasterizer *rast,
201 unsigned thread_index,
202 const union lp_rast_cmd_arg arg )
203 {
204 const uint8_t *clear_color = arg.clear_color;
205 uint8_t *color_tile = rast->tasks[thread_index].tile.color;
206
207 LP_DBG(DEBUG_RAST, "%s 0x%x,0x%x,0x%x,0x%x\n", __FUNCTION__,
208 clear_color[0],
209 clear_color[1],
210 clear_color[2],
211 clear_color[3]);
212
213 if (clear_color[0] == clear_color[1] &&
214 clear_color[1] == clear_color[2] &&
215 clear_color[2] == clear_color[3]) {
216 memset(color_tile, clear_color[0], TILE_SIZE * TILE_SIZE * 4);
217 }
218 else {
219 unsigned x, y, chan;
220 for (y = 0; y < TILE_SIZE; y++)
221 for (x = 0; x < TILE_SIZE; x++)
222 for (chan = 0; chan < 4; ++chan)
223 TILE_PIXEL(color_tile, x, y, chan) = clear_color[chan];
224 }
225 }
226
227
228 /**
229 * Clear the rasterizer's current z/stencil tile.
230 * This is a bin command called during bin processing.
231 */
232 void lp_rast_clear_zstencil( struct lp_rasterizer *rast,
233 unsigned thread_index,
234 const union lp_rast_cmd_arg arg)
235 {
236 unsigned i, j;
237 uint32_t *depth_tile = rast->tasks[thread_index].tile.depth;
238
239 LP_DBG(DEBUG_RAST, "%s 0x%x\n", __FUNCTION__, arg.clear_zstencil);
240
241 for (i = 0; i < TILE_SIZE; i++)
242 for (j = 0; j < TILE_SIZE; j++)
243 depth_tile[i*TILE_SIZE + j] = arg.clear_zstencil;
244 }
245
246
247 /**
248 * Load tile color from the framebuffer surface.
249 * This is a bin command called during bin processing.
250 */
251 void lp_rast_load_color( struct lp_rasterizer *rast,
252 unsigned thread_index,
253 const union lp_rast_cmd_arg arg)
254 {
255 LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__);
256
257 /* call u_tile func to load colors from surface */
258 }
259
260
261 /**
262 * Load tile z/stencil from the framebuffer surface.
263 * This is a bin command called during bin processing.
264 */
265 void lp_rast_load_zstencil( struct lp_rasterizer *rast,
266 unsigned thread_index,
267 const union lp_rast_cmd_arg arg )
268 {
269 LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__);
270
271 /* call u_tile func to load depth (and stencil?) from surface */
272 }
273
274
275 void lp_rast_set_state( struct lp_rasterizer *rast,
276 unsigned thread_index,
277 const union lp_rast_cmd_arg arg )
278 {
279 const struct lp_rast_state *state = arg.set_state;
280
281 LP_DBG(DEBUG_RAST, "%s %p\n", __FUNCTION__, (void *) state);
282
283 /* just set the current state pointer for this rasterizer */
284 rast->tasks[thread_index].current_state = state;
285 }
286
287
288
289 /* Within a tile:
290 */
291
292 /**
293 * Run the shader on all blocks in a tile. This is used when a tile is
294 * completely contained inside a triangle.
295 * This is a bin command called during bin processing.
296 */
297 void lp_rast_shade_tile( struct lp_rasterizer *rast,
298 unsigned thread_index,
299 const union lp_rast_cmd_arg arg )
300 {
301 const struct lp_rast_shader_inputs *inputs = arg.shade_tile;
302 const unsigned tile_x = rast->tasks[thread_index].x;
303 const unsigned tile_y = rast->tasks[thread_index].y;
304 const unsigned mask = ~0;
305 unsigned x, y;
306
307 LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__);
308
309 /* Use the existing preference for 4x4 (four quads) shading:
310 */
311 for (y = 0; y < TILE_SIZE; y += 4)
312 for (x = 0; x < TILE_SIZE; x += 4)
313 lp_rast_shade_quads( rast,
314 thread_index,
315 inputs,
316 tile_x + x,
317 tile_y + y,
318 mask);
319 }
320
321
322 /**
323 * Compute shading for a 4x4 block of pixels.
324 * This is a bin command called during bin processing.
325 */
326 void lp_rast_shade_quads( struct lp_rasterizer *rast,
327 unsigned thread_index,
328 const struct lp_rast_shader_inputs *inputs,
329 unsigned x, unsigned y,
330 unsigned mask)
331 {
332 #if 1
333 const struct lp_rast_state *state = rast->tasks[thread_index].current_state;
334 struct lp_rast_tile *tile = &rast->tasks[thread_index].tile;
335 void *color;
336 void *depth;
337 uint32_t ALIGN16_ATTRIB masks[2][2][2][2];
338 unsigned ix, iy;
339 int block_offset;
340
341 assert(state);
342
343 /* Sanity checks */
344 assert(x % TILE_VECTOR_WIDTH == 0);
345 assert(y % TILE_VECTOR_HEIGHT == 0);
346
347 /* mask: the rasterizer wants to treat pixels in 4x4 blocks, but
348 * the pixel shader wants to swizzle them into 4 2x2 quads.
349 *
350 * Additionally, the pixel shader wants masks as full dword ~0,
351 * while the rasterizer wants to pack per-pixel bits tightly.
352 */
353 #if 0
354 unsigned qx, qy;
355 for (qy = 0; qy < 2; ++qy)
356 for (qx = 0; qx < 2; ++qx)
357 for (iy = 0; iy < 2; ++iy)
358 for (ix = 0; ix < 2; ++ix)
359 masks[qy][qx][iy][ix] = mask & (1 << (qy*8+iy*4+qx*2+ix)) ? ~0 : 0;
360 #else
361 masks[0][0][0][0] = mask & (1 << (0*8+0*4+0*2+0)) ? ~0 : 0;
362 masks[0][0][0][1] = mask & (1 << (0*8+0*4+0*2+1)) ? ~0 : 0;
363 masks[0][0][1][0] = mask & (1 << (0*8+1*4+0*2+0)) ? ~0 : 0;
364 masks[0][0][1][1] = mask & (1 << (0*8+1*4+0*2+1)) ? ~0 : 0;
365 masks[0][1][0][0] = mask & (1 << (0*8+0*4+1*2+0)) ? ~0 : 0;
366 masks[0][1][0][1] = mask & (1 << (0*8+0*4+1*2+1)) ? ~0 : 0;
367 masks[0][1][1][0] = mask & (1 << (0*8+1*4+1*2+0)) ? ~0 : 0;
368 masks[0][1][1][1] = mask & (1 << (0*8+1*4+1*2+1)) ? ~0 : 0;
369
370 masks[1][0][0][0] = mask & (1 << (1*8+0*4+0*2+0)) ? ~0 : 0;
371 masks[1][0][0][1] = mask & (1 << (1*8+0*4+0*2+1)) ? ~0 : 0;
372 masks[1][0][1][0] = mask & (1 << (1*8+1*4+0*2+0)) ? ~0 : 0;
373 masks[1][0][1][1] = mask & (1 << (1*8+1*4+0*2+1)) ? ~0 : 0;
374 masks[1][1][0][0] = mask & (1 << (1*8+0*4+1*2+0)) ? ~0 : 0;
375 masks[1][1][0][1] = mask & (1 << (1*8+0*4+1*2+1)) ? ~0 : 0;
376 masks[1][1][1][0] = mask & (1 << (1*8+1*4+1*2+0)) ? ~0 : 0;
377 masks[1][1][1][1] = mask & (1 << (1*8+1*4+1*2+1)) ? ~0 : 0;
378 #endif
379
380 assert((x % 2) == 0);
381 assert((y % 2) == 0);
382
383 ix = x % TILE_SIZE;
384 iy = y % TILE_SIZE;
385
386 /* offset of the 16x16 pixel block within the tile */
387 block_offset = ((iy/4)*(16*16) + (ix/4)*16);
388
389 /* color buffer */
390 color = tile->color + 4 * block_offset;
391
392 /* depth buffer */
393 depth = tile->depth + block_offset;
394
395 /* XXX: This will most likely fail on 32bit x86 without -mstackrealign */
396 assert(lp_check_alignment(masks, 16));
397
398 assert(lp_check_alignment(depth, 16));
399 assert(lp_check_alignment(color, 16));
400 assert(lp_check_alignment(state->jit_context.blend_color, 16));
401
402 /* run shader */
403 state->jit_function( &state->jit_context,
404 x, y,
405 inputs->a0,
406 inputs->dadx,
407 inputs->dady,
408 &masks[0][0][0][0],
409 color,
410 depth);
411 #else
412 struct lp_rast_tile *tile = &rast->tile;
413 unsigned chan_index;
414 unsigned q, ix, iy;
415
416 x %= TILE_SIZE;
417 y %= TILE_SIZE;
418
419 /* mask */
420 for (q = 0; q < 4; ++q)
421 for(iy = 0; iy < 2; ++iy)
422 for(ix = 0; ix < 2; ++ix)
423 if(masks[q] & (1 << (iy*2 + ix)))
424 for (chan_index = 0; chan_index < NUM_CHANNELS; ++chan_index)
425 TILE_PIXEL(tile->color, x + q*2 + ix, y + iy, chan_index) = 0xff;
426
427 #endif
428 }
429
430
431 /* End of tile:
432 */
433
434
435 /**
436 * Write the rasterizer's color tile to the framebuffer.
437 */
438 static void lp_rast_store_color( struct lp_rasterizer *rast,
439 unsigned thread_index)
440 {
441 const unsigned x = rast->tasks[thread_index].x;
442 const unsigned y = rast->tasks[thread_index].y;
443 int w = TILE_SIZE;
444 int h = TILE_SIZE;
445
446 if (x + w > rast->state.fb.width)
447 w -= x + w - rast->state.fb.width;
448
449 if (y + h > rast->state.fb.height)
450 h -= y + h - rast->state.fb.height;
451
452 assert(w >= 0);
453 assert(h >= 0);
454 assert(w <= TILE_SIZE);
455 assert(h <= TILE_SIZE);
456
457 LP_DBG(DEBUG_RAST, "%s [%u] %d,%d %dx%d\n", __FUNCTION__,
458 thread_index, x, y, w, h);
459
460 lp_tile_write_4ub(rast->cbuf_transfer->format,
461 rast->tasks[thread_index].tile.color,
462 rast->cbuf_map,
463 rast->cbuf_transfer->stride,
464 x, y,
465 w, h);
466 }
467
468
469 static void
470 lp_tile_write_z32(const uint32_t *src, uint8_t *dst, unsigned dst_stride,
471 unsigned x0, unsigned y0, unsigned w, unsigned h)
472 {
473 unsigned x, y;
474 uint8_t *dst_row = dst + y0*dst_stride;
475 for (y = 0; y < h; ++y) {
476 uint32_t *dst_pixel = (uint32_t *)(dst_row + x0*4);
477 for (x = 0; x < w; ++x) {
478 *dst_pixel++ = *src++;
479 }
480 dst_row += dst_stride;
481 }
482 }
483
484 /**
485 * Write the rasterizer's z/stencil tile to the framebuffer.
486 */
487 static void lp_rast_store_zstencil( struct lp_rasterizer *rast,
488 unsigned thread_index )
489 {
490 const unsigned x = rast->tasks[thread_index].x;
491 const unsigned y = rast->tasks[thread_index].y;
492 unsigned w = TILE_SIZE;
493 unsigned h = TILE_SIZE;
494
495 if (x + w > rast->state.fb.width)
496 w -= x + w - rast->state.fb.width;
497
498 if (y + h > rast->state.fb.height)
499 h -= y + h - rast->state.fb.height;
500
501 LP_DBG(DEBUG_RAST, "%s %d,%d %dx%d\n", __FUNCTION__, x, y, w, h);
502
503 assert(rast->zsbuf_transfer->format == PIPE_FORMAT_Z32_UNORM);
504 lp_tile_write_z32(rast->tasks[thread_index].tile.depth,
505 rast->zsbuf_map,
506 rast->zsbuf_transfer->stride,
507 x, y, w, h);
508 }
509
510
511 /**
512 * Write the rasterizer's tiles to the framebuffer.
513 */
514 static void
515 lp_rast_end_tile( struct lp_rasterizer *rast,
516 unsigned thread_index )
517 {
518 LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__);
519
520 if (rast->state.write_color)
521 lp_rast_store_color(rast, thread_index);
522
523 if (rast->state.write_zstencil)
524 lp_rast_store_zstencil(rast, thread_index);
525 }
526
527
528 /**
529 * Rasterize commands for a single bin.
530 * \param x, y position of the bin's tile in the framebuffer
531 * Must be called between lp_rast_begin() and lp_rast_end().
532 * Called per thread.
533 */
534 static void
535 rasterize_bin( struct lp_rasterizer *rast,
536 unsigned thread_index,
537 const struct cmd_bin *bin,
538 int x, int y)
539 {
540 const struct cmd_block_list *commands = &bin->commands;
541 struct cmd_block *block;
542 unsigned k;
543
544 lp_rast_start_tile( rast, thread_index, x, y );
545
546 /* simply execute each of the commands in the block list */
547 for (block = commands->head; block; block = block->next) {
548 for (k = 0; k < block->count; k++) {
549 block->cmd[k]( rast, thread_index, block->arg[k] );
550 }
551 }
552
553 lp_rast_end_tile( rast, thread_index );
554 }
555
556
557 /**
558 * Rasterize/execute all bins.
559 * Called per thread.
560 */
561 static void
562 rasterize_bins( struct lp_rasterizer *rast,
563 unsigned thread_index,
564 struct lp_bins *bins,
565 bool write_depth )
566 {
567 /* loop over tile bins, rasterize each */
568 #if 0
569 {
570 unsigned i, j;
571 for (i = 0; i < bins->tiles_x; i++) {
572 for (j = 0; j < bins->tiles_y; j++) {
573 struct cmd_bin *bin = lp_get_bin(bins, i, j);
574 rasterize_bin( rast, thread_index,
575 bin, i * TILE_SIZE, j * TILE_SIZE );
576 }
577 }
578 }
579 #else
580 {
581 struct cmd_bin *bin;
582 int x, y;
583
584 assert(bins);
585 while ((bin = lp_bin_iter_next(bins, &x, &y))) {
586 rasterize_bin( rast, thread_index, bin, x * TILE_SIZE, y * TILE_SIZE);
587 }
588 }
589 #endif
590 }
591
592
593 /**
594 * Called by setup module when it has something for us to render.
595 */
596 void
597 lp_rasterize_bins( struct lp_rasterizer *rast,
598 struct lp_bins *bins,
599 const struct pipe_framebuffer_state *fb,
600 bool write_depth )
601 {
602 boolean debug = false;
603
604 LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__);
605
606 if (debug) {
607 unsigned x, y;
608 printf("rasterize bins:\n");
609 printf(" data size: %u\n", lp_bin_data_size(bins));
610 for (y = 0; y < bins->tiles_y; y++) {
611 for (x = 0; x < bins->tiles_x; x++) {
612 printf(" bin %u, %u size: %u\n", x, y,
613 lp_bin_cmd_size(bins, x, y));
614 }
615 }
616 }
617
618 lp_rast_begin( rast, fb,
619 fb->cbufs[0]!= NULL,
620 fb->zsbuf != NULL && write_depth );
621
622 if (rast->num_threads == 0) {
623 /* no threading */
624 lp_bin_iter_begin( bins );
625 rasterize_bins( rast, 0, bins, write_depth );
626
627 /* reset bins and put into the empty queue */
628 lp_reset_bins( bins );
629 lp_bins_enqueue( rast->empty_bins, bins);
630 }
631 else {
632 /* threaded rendering! */
633 unsigned i;
634
635 lp_bins_enqueue( rast->full_bins, bins );
636
637 /* XXX need to move/fix these */
638 rast->fb = fb;
639 rast->write_depth = write_depth;
640
641 /*lp_bin_iter_begin( bins );*/
642
643 /* signal the threads that there's work to do */
644 for (i = 0; i < rast->num_threads; i++) {
645 pipe_semaphore_signal(&rast->tasks[i].work_ready);
646 }
647
648 /* wait for work to complete */
649 for (i = 0; i < rast->num_threads; i++) {
650 pipe_semaphore_wait(&rast->tasks[i].work_done);
651 }
652 }
653
654 lp_rast_end( rast );
655
656 LP_DBG(DEBUG_SETUP, "%s done \n", __FUNCTION__);
657 }
658
659
660 /**
661 * This is the thread's main entrypoint.
662 * It's a simple loop:
663 * 1. wait for work
664 * 2. do work
665 * 3. signal that we're done
666 */
667 static void *
668 thread_func( void *init_data )
669 {
670 struct lp_rasterizer_task *task = (struct lp_rasterizer_task *) init_data;
671 struct lp_rasterizer *rast = task->rast;
672 boolean debug = false;
673
674 while (1) {
675 struct lp_bins *bins;
676
677 /* wait for work */
678 if (debug)
679 debug_printf("thread %d waiting for work\n", task->thread_index);
680 pipe_semaphore_wait(&task->work_ready);
681
682 bins = get_next_full_bin( rast );
683 assert(bins);
684
685 /* do work */
686 if (debug)
687 debug_printf("thread %d doing work\n", task->thread_index);
688 rasterize_bins(rast, task->thread_index,
689 bins, rast->write_depth);
690
691 release_current_bin( rast );
692
693 /* signal done with work */
694 if (debug)
695 debug_printf("thread %d done working\n", task->thread_index);
696 pipe_semaphore_signal(&task->work_done);
697 }
698
699 return NULL;
700 }
701
702
703 /**
704 * Initialize semaphores and spawn the threads.
705 */
706 static void
707 create_rast_threads(struct lp_rasterizer *rast)
708 {
709 unsigned i;
710
711 rast->num_threads = util_cpu_caps.nr_cpus;
712 rast->num_threads = debug_get_num_option("LP_NUM_THREADS", rast->num_threads);
713 rast->num_threads = MIN2(rast->num_threads, MAX_THREADS);
714
715 /* NOTE: if num_threads is zero, we won't use any threads */
716 for (i = 0; i < rast->num_threads; i++) {
717 pipe_semaphore_init(&rast->tasks[i].work_ready, 0);
718 pipe_semaphore_init(&rast->tasks[i].work_done, 0);
719 rast->threads[i] = pipe_thread_create(thread_func,
720 (void *) &rast->tasks[i]);
721 }
722 }
723
724
725
726 /**
727 * Create new lp_rasterizer.
728 * \param empty the queue to put empty bins on after we've finished
729 * processing them.
730 */
731 struct lp_rasterizer *
732 lp_rast_create( struct pipe_screen *screen, struct lp_bins_queue *empty )
733 {
734 struct lp_rasterizer *rast;
735 unsigned i;
736
737 rast = CALLOC_STRUCT(lp_rasterizer);
738 if(!rast)
739 return NULL;
740
741 rast->screen = screen;
742
743 rast->empty_bins = empty;
744 rast->full_bins = lp_bins_queue_create();
745
746 for (i = 0; i < Elements(rast->tasks); i++) {
747 rast->tasks[i].tile.color = align_malloc( TILE_SIZE*TILE_SIZE*4, 16 );
748 rast->tasks[i].tile.depth = align_malloc( TILE_SIZE*TILE_SIZE*4, 16 );
749 rast->tasks[i].rast = rast;
750 rast->tasks[i].thread_index = i;
751 }
752
753 create_rast_threads(rast);
754
755 return rast;
756 }
757
758
759 /* Shutdown:
760 */
761 void lp_rast_destroy( struct lp_rasterizer *rast )
762 {
763 unsigned i;
764
765 util_unreference_framebuffer_state(&rast->state.fb);
766
767 for (i = 0; i < Elements(rast->tasks); i++) {
768 align_free(rast->tasks[i].tile.depth);
769 align_free(rast->tasks[i].tile.color);
770 }
771
772 FREE(rast);
773 }
774