X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Fllvmpipe%2Flp_rast.c;h=e2c1b6d5cbaccd56d5bbdf36ecb04e246f34b247;hb=c7f5c9a3dc6350252e73b541bb85ab3ed9e64a9c;hp=5ae323fd96cd2b0847aed2dae8f351b09831a6ba;hpb=653a83445f94620673f747a4ace6847a2c7fdb4d;p=mesa.git diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 5ae323fd96c..e2c1b6d5cba 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -28,7 +28,6 @@ #include #include "util/u_memory.h" #include "util/u_math.h" -#include "util/u_cpu_detect.h" #include "util/u_surface.h" #include "lp_scene_queue.h" @@ -43,110 +42,90 @@ /** - * Begin the rasterization phase. - * Map the framebuffer surfaces. Initialize the 'rast' state. + * Begin rasterizing a scene. + * Called once per scene by one thread. */ -static boolean +static void lp_rast_begin( struct lp_rasterizer *rast, - const struct pipe_framebuffer_state *fb, - boolean write_color, - boolean write_zstencil ) + struct lp_scene *scene ) { - struct pipe_screen *screen = rast->screen; - struct pipe_surface *cbuf, *zsbuf; + const struct pipe_framebuffer_state *fb = &scene->fb; int i; - LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__); - - util_copy_framebuffer_state(&rast->state.fb, fb); - - rast->state.write_zstencil = write_zstencil; - rast->state.write_color = write_color; + rast->curr_scene = scene; - rast->check_for_clipped_tiles = (fb->width % TILE_SIZE != 0 || - fb->height % TILE_SIZE != 0); + LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__); + rast->state.nr_cbufs = scene->fb.nr_cbufs; - for (i = 0; i < rast->state.fb.nr_cbufs; i++) { - cbuf = rast->state.fb.cbufs[i]; - if (cbuf) { - rast->cbuf_transfer[i] = screen->get_tex_transfer(rast->screen, - cbuf->texture, - cbuf->face, - cbuf->level, - cbuf->zslice, - PIPE_TRANSFER_READ_WRITE, - 0, 0, - cbuf->width, - cbuf->height); - if (!rast->cbuf_transfer[i]) - goto fail; - - rast->cbuf_map[i] = screen->transfer_map(rast->screen, - rast->cbuf_transfer[i]); - if (!rast->cbuf_map[i]) - goto fail; - } + for (i = 0; i < rast->state.nr_cbufs; i++) { + struct pipe_surface *cbuf = scene->fb.cbufs[i]; + rast->cbuf[i].format = cbuf->texture->format; + rast->cbuf[i].tiles_per_row = align(cbuf->width, TILE_SIZE) / TILE_SIZE; + rast->cbuf[i].blocksize = + util_format_get_blocksize(cbuf->texture->format); + rast->cbuf[i].map = llvmpipe_resource_map(cbuf->texture, + cbuf->face, + cbuf->level, + cbuf->zslice, + LP_TEX_USAGE_READ_WRITE, + LP_TEX_LAYOUT_NONE); } - zsbuf = rast->state.fb.zsbuf; - if (zsbuf) { - rast->zsbuf_transfer = screen->get_tex_transfer(rast->screen, - zsbuf->texture, - zsbuf->face, - zsbuf->level, - zsbuf->zslice, - PIPE_TRANSFER_READ_WRITE, - 0, 0, - zsbuf->width, - zsbuf->height); - if (!rast->zsbuf_transfer) - goto fail; - - rast->zsbuf_map = screen->transfer_map(rast->screen, - rast->zsbuf_transfer); - if (!rast->zsbuf_map) - goto fail; + if (fb->zsbuf) { + struct pipe_surface *zsbuf = scene->fb.zsbuf; + rast->zsbuf.stride = llvmpipe_resource_stride(zsbuf->texture, zsbuf->level); + rast->zsbuf.blocksize = + util_format_get_blocksize(zsbuf->texture->format); + + rast->zsbuf.map = llvmpipe_resource_map(zsbuf->texture, + zsbuf->face, + zsbuf->level, + zsbuf->zslice, + LP_TEX_USAGE_READ_WRITE, + LP_TEX_LAYOUT_NONE); + assert(rast->zsbuf.map); } - return TRUE; - -fail: - /* Unmap and release transfers? - */ - return FALSE; + lp_scene_bin_iter_begin( scene ); } -/** - * Finish the rasterization phase. - * Unmap framebuffer surfaces. - */ static void lp_rast_end( struct lp_rasterizer *rast ) { - struct pipe_screen *screen = rast->screen; + struct lp_scene *scene = rast->curr_scene; unsigned i; - for (i = 0; i < rast->state.fb.nr_cbufs; i++) { - if (rast->cbuf_map[i]) - screen->transfer_unmap(screen, rast->cbuf_transfer[i]); - - if (rast->cbuf_transfer[i]) - screen->tex_transfer_destroy(rast->cbuf_transfer[i]); + /* Unmap color buffers */ + for (i = 0; i < rast->state.nr_cbufs; i++) { + struct pipe_surface *cbuf = scene->fb.cbufs[i]; + llvmpipe_resource_unmap(cbuf->texture, + cbuf->face, + cbuf->level, + cbuf->zslice); + rast->cbuf[i].map = NULL; + } - rast->cbuf_transfer[i] = NULL; - rast->cbuf_map[i] = NULL; + /* Unmap z/stencil buffer */ + if (rast->zsbuf.map) { + struct pipe_surface *zsbuf = scene->fb.zsbuf; + llvmpipe_resource_unmap(zsbuf->texture, + zsbuf->face, + zsbuf->level, + zsbuf->zslice); + rast->zsbuf.map = NULL; } - if (rast->zsbuf_map) - screen->transfer_unmap(screen, rast->zsbuf_transfer); + lp_scene_reset( rast->curr_scene ); - if (rast->zsbuf_transfer) - screen->tex_transfer_destroy(rast->zsbuf_transfer); + rast->curr_scene = NULL; - rast->zsbuf_transfer = NULL; - rast->zsbuf_map = NULL; +#ifdef DEBUG + if (0) + debug_printf("Post render scene: tile unswizzle: %u tile swizzle: %u\n", + lp_tile_unswizzle_count, lp_tile_swizzle_count); +#endif } @@ -156,14 +135,71 @@ lp_rast_end( struct lp_rasterizer *rast ) * \param y window Y position of the tile, in pixels */ static void -lp_rast_start_tile( struct lp_rasterizer *rast, - unsigned thread_index, - unsigned x, unsigned y ) +lp_rast_tile_begin(struct lp_rasterizer_task *task, + unsigned x, unsigned y) { + struct lp_rasterizer *rast = task->rast; + struct lp_scene *scene = rast->curr_scene; + enum lp_texture_usage usage; + unsigned buf; + LP_DBG(DEBUG_RAST, "%s %d,%d\n", __FUNCTION__, x, y); - rast->tasks[thread_index].x = x; - rast->tasks[thread_index].y = y; + assert(x % TILE_SIZE == 0); + assert(y % TILE_SIZE == 0); + + task->x = x; + task->y = y; + + if (scene->has_color_clear) + usage = LP_TEX_USAGE_WRITE_ALL; + else + usage = LP_TEX_USAGE_READ_WRITE; + + /* get pointers to color tile(s) */ + for (buf = 0; buf < rast->state.nr_cbufs; buf++) { + struct pipe_surface *cbuf = rast->curr_scene->fb.cbufs[buf]; + struct llvmpipe_resource *lpt; + assert(cbuf); + lpt = llvmpipe_resource(cbuf->texture); + task->color_tiles[buf] = llvmpipe_get_texture_tile(lpt, + cbuf->face + cbuf->zslice, + cbuf->level, + usage, + x, y); + assert(task->color_tiles[buf]); + } + + /* get pointer to depth/stencil tile */ + { + struct pipe_surface *zsbuf = rast->curr_scene->fb.zsbuf; + if (zsbuf) { + struct llvmpipe_resource *lpt = llvmpipe_resource(zsbuf->texture); + + if (scene->has_depth_clear) + usage = LP_TEX_USAGE_WRITE_ALL; + else + usage = LP_TEX_USAGE_READ_WRITE; + + /* "prime" the tile: convert data from linear to tiled if necessary + * and update the tile's layout info. + */ + (void) llvmpipe_get_texture_tile(lpt, + zsbuf->face + zsbuf->zslice, + zsbuf->level, + usage, + x, y); + /* Get actual pointer to the tile data. Note that depth/stencil + * data is tiled differently than color data. + */ + task->depth_tile = lp_rast_get_depth_block_pointer(rast, x, y); + + assert(task->depth_tile); + } + else { + task->depth_tile = NULL; + } + } } @@ -171,12 +207,13 @@ lp_rast_start_tile( struct lp_rasterizer *rast, * Clear the rasterizer's current color tile. * This is a bin command called during bin processing. */ -void lp_rast_clear_color( struct lp_rasterizer *rast, - unsigned thread_index, - const union lp_rast_cmd_arg arg ) +void +lp_rast_clear_color(struct lp_rasterizer_task *task, + const union lp_rast_cmd_arg arg) { + struct lp_rasterizer *rast = task->rast; const uint8_t *clear_color = arg.clear_color; - uint8_t **color_tile = rast->tasks[thread_index].tile.color; + unsigned i; LP_DBG(DEBUG_RAST, "%s 0x%x,0x%x,0x%x,0x%x\n", __FUNCTION__, @@ -189,8 +226,9 @@ void lp_rast_clear_color( struct lp_rasterizer *rast, clear_color[1] == clear_color[2] && clear_color[2] == clear_color[3]) { /* clear to grayscale value {x, x, x, x} */ - for (i = 0; i < rast->state.fb.nr_cbufs; i++) { - memset(color_tile[i], clear_color[0], TILE_SIZE * TILE_SIZE * 4); + for (i = 0; i < rast->state.nr_cbufs; i++) { + uint8_t *ptr = task->color_tiles[i]; + memset(ptr, clear_color[0], TILE_SIZE * TILE_SIZE * 4); } } else { @@ -200,9 +238,10 @@ void lp_rast_clear_color( struct lp_rasterizer *rast, * works. */ const unsigned chunk = TILE_SIZE / 4; - for (i = 0; i < rast->state.fb.nr_cbufs; i++) { - uint8_t *c = color_tile[i]; + for (i = 0; i < rast->state.nr_cbufs; i++) { + uint8_t *c = task->color_tiles[i]; unsigned j; + for (j = 0; j < 4 * TILE_SIZE; j++) { memset(c, clear_color[0], chunk); c += chunk; @@ -213,7 +252,6 @@ void lp_rast_clear_color( struct lp_rasterizer *rast, memset(c, clear_color[3], chunk); c += chunk; } - assert(c - color_tile[i] == TILE_SIZE * TILE_SIZE * 4); } } @@ -225,17 +263,56 @@ void lp_rast_clear_color( struct lp_rasterizer *rast, * Clear the rasterizer's current z/stencil tile. * This is a bin command called during bin processing. */ -void lp_rast_clear_zstencil( struct lp_rasterizer *rast, - unsigned thread_index, - const union lp_rast_cmd_arg arg) +void +lp_rast_clear_zstencil(struct lp_rasterizer_task *task, + const union lp_rast_cmd_arg arg) { - unsigned i; - uint32_t *depth_tile = rast->tasks[thread_index].tile.depth; - + struct lp_rasterizer *rast = task->rast; + const unsigned height = TILE_SIZE / TILE_VECTOR_HEIGHT; + const unsigned width = TILE_SIZE * TILE_VECTOR_HEIGHT; + const unsigned block_size = rast->zsbuf.blocksize; + const unsigned dst_stride = rast->zsbuf.stride * TILE_VECTOR_HEIGHT; + uint8_t *dst; + unsigned i, j; + LP_DBG(DEBUG_RAST, "%s 0x%x\n", __FUNCTION__, arg.clear_zstencil); - for (i = 0; i < TILE_SIZE * TILE_SIZE; i++) - depth_tile[i] = arg.clear_zstencil; + /* + * Clear the aera of the swizzled depth/depth buffer matching this tile, in + * stripes of TILE_VECTOR_HEIGHT x TILE_SIZE at a time. + * + * The swizzled depth format is such that the depths for + * TILE_VECTOR_HEIGHT x TILE_VECTOR_WIDTH pixels have consecutive offsets. + */ + + dst = task->depth_tile; + + assert(dst == lp_rast_get_depth_block_pointer(rast, task->x, task->y)); + + switch (block_size) { + case 1: + memset(dst, (uint8_t) arg.clear_zstencil, height * width); + break; + case 2: + for (i = 0; i < height; i++) { + uint16_t *row = (uint16_t *)dst; + for (j = 0; j < width; j++) + *row++ = (uint16_t) arg.clear_zstencil; + dst += dst_stride; + } + break; + case 4: + for (i = 0; i < height; i++) { + uint32_t *row = (uint32_t *)dst; + for (j = 0; j < width; j++) + *row++ = arg.clear_zstencil; + dst += dst_stride; + } + break; + default: + assert(0); + break; + } } @@ -243,121 +320,99 @@ void lp_rast_clear_zstencil( struct lp_rasterizer *rast, * Load tile color from the framebuffer surface. * This is a bin command called during bin processing. */ -void lp_rast_load_color( struct lp_rasterizer *rast, - unsigned thread_index, - const union lp_rast_cmd_arg arg) +#if 0 +void +lp_rast_load_color(struct lp_rasterizer_task *task, + const union lp_rast_cmd_arg arg) { - struct lp_rasterizer_task *task = &rast->tasks[thread_index]; - const unsigned x = task->x; - const unsigned y = task->y; - unsigned i; + struct lp_rasterizer *rast = task->rast; + unsigned buf; + enum lp_texture_usage usage; LP_DBG(DEBUG_RAST, "%s at %u, %u\n", __FUNCTION__, x, y); - for (i = 0; i < rast->state.fb.nr_cbufs; i++) { - struct pipe_transfer *transfer = rast->cbuf_transfer[i]; - int w = TILE_SIZE; - int h = TILE_SIZE; + if (scene->has_color_clear) + usage = LP_TEX_USAGE_WRITE_ALL; + else + usage = LP_TEX_USAGE_READ_WRITE; - if (x >= transfer->width) - continue; - - if (y >= transfer->height) - continue; - - assert(w >= 0); - assert(h >= 0); - assert(w <= TILE_SIZE); - assert(h <= TILE_SIZE); - - lp_tile_read_4ub(transfer->texture->format, - task->tile.color[i], - rast->cbuf_map[i], - transfer->stride, - x, y, - w, h); - - LP_COUNT(nr_color_tile_load); + /* Get pointers to color tile(s). + * This will convert linear data to tiled if needed. + */ + for (buf = 0; buf < rast->state.nr_cbufs; buf++) { + struct pipe_surface *cbuf = rast->curr_scene->fb.cbufs[buf]; + struct llvmpipe_texture *lpt; + assert(cbuf); + lpt = llvmpipe_texture(cbuf->texture); + task->color_tiles[buf] = llvmpipe_get_texture_tile(lpt, + cbuf->face + cbuf->zslice, + cbuf->level, + usage, + task->x, task->y); + assert(task->color_tiles[buf]); } } +#endif -static void -lp_tile_read_z32(uint32_t *tile, - const uint8_t *map, - unsigned map_stride, - unsigned x0, unsigned y0, unsigned w, unsigned h) +/** + * Convert the color tile from tiled to linear layout. + * This is generally only done when we're flushing the scene just prior to + * SwapBuffers. If we didn't do this here, we'd have to convert the entire + * tiled color buffer to linear layout in the llvmpipe_texture_unmap() + * function. It's better to do it here to take advantage of + * threading/parallelism. + * This is a bin command which is stored in all bins. + */ +void +lp_rast_store_color( struct lp_rasterizer_task *task, + const union lp_rast_cmd_arg arg) { - unsigned x, y; - const uint8_t *map_row = map + y0*map_stride; - for (y = 0; y < h; ++y) { - const uint32_t *map_pixel = (uint32_t *)(map_row + x0*4); - for (x = 0; x < w; ++x) { - *tile++ = *map_pixel++; - } - map_row += map_stride; + struct lp_rasterizer *rast = task->rast; + struct lp_scene *scene = rast->curr_scene; + unsigned buf; + + for (buf = 0; buf < rast->state.nr_cbufs; buf++) { + struct pipe_surface *cbuf = scene->fb.cbufs[buf]; + const unsigned face = cbuf->face, level = cbuf->level; + struct llvmpipe_resource *lpt = llvmpipe_resource(cbuf->texture); + /* this will convert the tiled data to linear if needed */ + (void) llvmpipe_get_texture_tile_linear(lpt, face, level, + LP_TEX_USAGE_READ, + task->x, task->y); } } + /** - * Load tile z/stencil from the framebuffer surface. * This is a bin command called during bin processing. */ -void lp_rast_load_zstencil( struct lp_rasterizer *rast, - unsigned thread_index, - const union lp_rast_cmd_arg arg ) -{ - struct lp_rasterizer_task *task = &rast->tasks[thread_index]; - const unsigned x = task->x; - const unsigned y = task->y; - unsigned w = TILE_SIZE; - unsigned h = TILE_SIZE; - - if (x + w > rast->state.fb.width) - w -= x + w - rast->state.fb.width; - - if (y + h > rast->state.fb.height) - h -= y + h - rast->state.fb.height; - - LP_DBG(DEBUG_RAST, "%s %d,%d %dx%d\n", __FUNCTION__, x, y, w, h); - - assert(rast->zsbuf_transfer->texture->format == PIPE_FORMAT_Z32_UNORM); - lp_tile_read_z32(task->tile.depth, - rast->zsbuf_map, - rast->zsbuf_transfer->stride, - x, y, w, h); -} - - -void lp_rast_set_state( struct lp_rasterizer *rast, - unsigned thread_index, - const union lp_rast_cmd_arg arg ) +void +lp_rast_set_state(struct lp_rasterizer_task *task, + const union lp_rast_cmd_arg arg) { const struct lp_rast_state *state = arg.set_state; LP_DBG(DEBUG_RAST, "%s %p\n", __FUNCTION__, (void *) state); /* just set the current state pointer for this rasterizer */ - rast->tasks[thread_index].current_state = state; + task->current_state = state; } - /** * Run the shader on all blocks in a tile. This is used when a tile is * completely contained inside a triangle. * This is a bin command called during bin processing. */ -void lp_rast_shade_tile( struct lp_rasterizer *rast, - unsigned thread_index, - const union lp_rast_cmd_arg arg ) +void +lp_rast_shade_tile(struct lp_rasterizer_task *task, + const union lp_rast_cmd_arg arg) { - struct lp_rasterizer_task *task = &rast->tasks[thread_index]; + struct lp_rasterizer *rast = task->rast; const struct lp_rast_state *state = task->current_state; - struct lp_rast_tile *tile = &task->tile; const struct lp_rast_shader_inputs *inputs = arg.shade_tile; - const unsigned tile_x = task->x; - const unsigned tile_y = task->y; + const unsigned tile_x = task->x, tile_y = task->y; unsigned x, y; LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__); @@ -367,28 +422,27 @@ void lp_rast_shade_tile( struct lp_rasterizer *rast, for (x = 0; x < TILE_SIZE; x += 4) { uint8_t *color[PIPE_MAX_COLOR_BUFS]; uint32_t *depth; - unsigned block_offset, i; - - /* offset of the 16x16 pixel block within the tile */ - block_offset = ((y / 4) * (16 * 16) + (x / 4) * 16); + unsigned i; /* color buffer */ - for (i = 0; i < rast->state.fb.nr_cbufs; i++) - color[i] = tile->color[i] + 4 * block_offset; + for (i = 0; i < rast->state.nr_cbufs; i++) + color[i] = lp_rast_get_color_block_pointer(task, i, + tile_x + x, tile_y + y); /* depth buffer */ - depth = tile->depth + block_offset; - - /* run shader */ - state->jit_function[0]( &state->jit_context, - tile_x + x, tile_y + y, - inputs->a0, - inputs->dadx, - inputs->dady, - color, - depth, - INT_MIN, INT_MIN, INT_MIN, - NULL, NULL, NULL ); + depth = lp_rast_get_depth_block_pointer(rast, tile_x + x, tile_y + y); + + /* run shader on 4x4 block */ + state->jit_function[RAST_WHOLE]( &state->jit_context, + tile_x + x, tile_y + y, + inputs->facing, + inputs->a0, + inputs->dadx, + inputs->dady, + color, + depth, + INT_MIN, INT_MIN, INT_MIN, + NULL, NULL, NULL ); } } } @@ -397,23 +451,20 @@ void lp_rast_shade_tile( struct lp_rasterizer *rast, /** * Compute shading for a 4x4 block of pixels. * This is a bin command called during bin processing. + * \param x X position of quad in window coords + * \param y Y position of quad in window coords */ -void lp_rast_shade_quads( struct lp_rasterizer *rast, - unsigned thread_index, +void lp_rast_shade_quads( struct lp_rasterizer_task *task, const struct lp_rast_shader_inputs *inputs, unsigned x, unsigned y, int32_t c1, int32_t c2, int32_t c3) { - struct lp_rasterizer_task *task = &rast->tasks[thread_index]; const struct lp_rast_state *state = task->current_state; - struct lp_rast_tile *tile = &task->tile; + struct lp_rasterizer *rast = task->rast; uint8_t *color[PIPE_MAX_COLOR_BUFS]; void *depth; unsigned i; - unsigned ix, iy; - int block_offset; -#ifdef DEBUG assert(state); /* Sanity checks */ @@ -422,43 +473,36 @@ void lp_rast_shade_quads( struct lp_rasterizer *rast, assert((x % 4) == 0); assert((y % 4) == 0); -#endif - - ix = x % TILE_SIZE; - iy = y % TILE_SIZE; - - /* offset of the 16x16 pixel block within the tile */ - block_offset = ((iy / 4) * (16 * 16) + (ix / 4) * 16); /* color buffer */ - for (i = 0; i < rast->state.fb.nr_cbufs; i++) - color[i] = tile->color[i] + 4 * block_offset; + for (i = 0; i < rast->state.nr_cbufs; i++) { + color[i] = lp_rast_get_color_block_pointer(task, i, x, y); + assert(lp_check_alignment(color[i], 16)); + } /* depth buffer */ - depth = tile->depth + block_offset; + depth = lp_rast_get_depth_block_pointer(rast, x, y); - -#ifdef DEBUG - assert(lp_check_alignment(tile->depth, 16)); - assert(lp_check_alignment(tile->color[0], 16)); assert(lp_check_alignment(state->jit_context.blend_color, 16)); assert(lp_check_alignment(inputs->step[0], 16)); assert(lp_check_alignment(inputs->step[1], 16)); assert(lp_check_alignment(inputs->step[2], 16)); -#endif - /* run shader */ - state->jit_function[1]( &state->jit_context, - x, y, - inputs->a0, - inputs->dadx, - inputs->dady, - color, - depth, - c1, c2, c3, - inputs->step[0], inputs->step[1], inputs->step[2]); + /* run shader on 4x4 block */ + state->jit_function[RAST_EDGE_TEST]( &state->jit_context, + x, y, + inputs->facing, + inputs->a0, + inputs->dadx, + inputs->dady, + color, + depth, + c1, c2, c3, + inputs->step[0], + inputs->step[1], + inputs->step[2]); } @@ -516,145 +560,48 @@ outline_subtiles(uint8_t *tile) /** - * Write the rasterizer's color tile to the framebuffer. + * Called when we're done writing to a color tile. */ -static void lp_rast_store_color( struct lp_rasterizer *rast, - unsigned thread_index) +static void +lp_rast_tile_end(struct lp_rasterizer_task *task) { - struct lp_rasterizer_task *task = &rast->tasks[thread_index]; - const unsigned x = task->x; - const unsigned y = task->y; - unsigned i; - - for (i = 0; i < rast->state.fb.nr_cbufs; i++) { - struct pipe_transfer *transfer = rast->cbuf_transfer[i]; - int w = TILE_SIZE; - int h = TILE_SIZE; - - if (x >= transfer->width) - continue; - - if (y >= transfer->height) - continue; +#if DEBUG + struct lp_rasterizer *rast = task->rast; + unsigned buf; - LP_DBG(DEBUG_RAST, "%s [%u] %d,%d %dx%d\n", __FUNCTION__, - thread_index, x, y, w, h); + for (buf = 0; buf < rast->state.nr_cbufs; buf++) { + uint8_t *color = lp_rast_get_color_block_pointer(task, buf, + task->x, task->y); if (LP_DEBUG & DEBUG_SHOW_SUBTILES) - outline_subtiles(task->tile.color[i]); + outline_subtiles(color); else if (LP_DEBUG & DEBUG_SHOW_TILES) - outline_tile(task->tile.color[i]); - - lp_tile_write_4ub(transfer->texture->format, - task->tile.color[i], - rast->cbuf_map[i], - transfer->stride, - x, y, - w, h); - - LP_COUNT(nr_color_tile_store); + outline_tile(color); } -} - +#else + (void) outline_subtiles; +#endif -static void -lp_tile_write_z32(const uint32_t *src, uint8_t *dst, unsigned dst_stride, - unsigned x0, unsigned y0, unsigned w, unsigned h) -{ - unsigned x, y; - uint8_t *dst_row = dst + y0*dst_stride; - for (y = 0; y < h; ++y) { - uint32_t *dst_pixel = (uint32_t *)(dst_row + x0*4); - for (x = 0; x < w; ++x) { - *dst_pixel++ = *src++; - } - dst_row += dst_stride; - } + /* debug */ + memset(task->color_tiles, 0, sizeof(task->color_tiles)); + task->depth_tile = NULL; } -/** - * Write the rasterizer's z/stencil tile to the framebuffer. - */ -static void lp_rast_store_zstencil( struct lp_rasterizer *rast, - unsigned thread_index ) -{ - struct lp_rasterizer_task *task = &rast->tasks[thread_index]; - const unsigned x = task->x; - const unsigned y = task->y; - unsigned w = TILE_SIZE; - unsigned h = TILE_SIZE; - - if (x + w > rast->state.fb.width) - w -= x + w - rast->state.fb.width; - - if (y + h > rast->state.fb.height) - h -= y + h - rast->state.fb.height; - - LP_DBG(DEBUG_RAST, "%s %d,%d %dx%d\n", __FUNCTION__, x, y, w, h); - - assert(rast->zsbuf_transfer->texture->format == PIPE_FORMAT_Z32_UNORM); - lp_tile_write_z32(task->tile.depth, - rast->zsbuf_map, - rast->zsbuf_transfer->stride, - x, y, w, h); -} - - -/** - * Write the rasterizer's tiles to the framebuffer. - */ -static void -lp_rast_end_tile( struct lp_rasterizer *rast, - unsigned thread_index ) -{ - LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__); - - if (rast->state.write_color) - lp_rast_store_color(rast, thread_index); - - if (rast->state.write_zstencil) - lp_rast_store_zstencil(rast, thread_index); -} /** * Signal on a fence. This is called during bin execution/rasterization. * Called per thread. */ -void lp_rast_fence( struct lp_rasterizer *rast, - unsigned thread_index, - const union lp_rast_cmd_arg arg ) +void +lp_rast_fence(struct lp_rasterizer_task *task, + const union lp_rast_cmd_arg arg) { struct lp_fence *fence = arg.fence; - - pipe_mutex_lock( fence->mutex ); - - fence->count++; - assert(fence->count <= fence->rank); - - LP_DBG(DEBUG_RAST, "%s count=%u rank=%u\n", __FUNCTION__, - fence->count, fence->rank); - - pipe_condvar_signal( fence->signalled ); - - pipe_mutex_unlock( fence->mutex ); + lp_fence_signal(fence); } -/** - * When all the threads are done rasterizing a scene, one thread will - * call this function to reset the scene and put it onto the empty queue. - */ -static void -release_scene( struct lp_rasterizer *rast, - struct lp_scene *scene ) -{ - util_unreference_framebuffer_state( &scene->fb ); - - lp_scene_reset( scene ); - lp_scene_enqueue( rast->empty_scenes, scene ); - rast->curr_scene = NULL; -} /** @@ -664,25 +611,28 @@ release_scene( struct lp_rasterizer *rast, * Called per thread. */ static void -rasterize_bin( struct lp_rasterizer *rast, - unsigned thread_index, - const struct cmd_bin *bin, - int x, int y) +rasterize_bin(struct lp_rasterizer_task *task, + const struct cmd_bin *bin, + int x, int y) { const struct cmd_block_list *commands = &bin->commands; struct cmd_block *block; unsigned k; - lp_rast_start_tile( rast, thread_index, x, y ); + lp_rast_tile_begin( task, x * TILE_SIZE, y * TILE_SIZE ); /* simply execute each of the commands in the block list */ for (block = commands->head; block; block = block->next) { for (k = 0; k < block->count; k++) { - block->cmd[k]( rast, thread_index, block->arg[k] ); + block->cmd[k]( task, block->arg[k] ); } } - lp_rast_end_tile( rast, thread_index ); + lp_rast_tile_end(task); + + /* Free data for this bin. + */ + lp_scene_bin_reset( task->rast->curr_scene, x, y); } @@ -693,13 +643,12 @@ static struct { const char *name; } cmd_names[] = { - RAST(load_color), - RAST(load_zstencil), RAST(clear_color), RAST(clear_zstencil), RAST(triangle), RAST(shade_tile), RAST(set_state), + RAST(store_color), RAST(fence), }; @@ -752,9 +701,7 @@ is_empty_bin( const struct cmd_bin *bin ) } for (i = 0; i < head->count; i++) - if (head->cmd[i] != lp_rast_load_color && - head->cmd[i] != lp_rast_load_zstencil && - head->cmd[i] != lp_rast_set_state) { + if (head->cmd[i] != lp_rast_set_state) { return FALSE; } @@ -768,10 +715,8 @@ is_empty_bin( const struct cmd_bin *bin ) * Called per thread. */ static void -rasterize_scene( struct lp_rasterizer *rast, - unsigned thread_index, - struct lp_scene *scene, - bool write_depth ) +rasterize_scene(struct lp_rasterizer_task *task, + struct lp_scene *scene) { /* loop over scene bins, rasterize each */ #if 0 @@ -779,9 +724,8 @@ rasterize_scene( struct lp_rasterizer *rast, unsigned i, j; for (i = 0; i < scene->tiles_x; i++) { for (j = 0; j < scene->tiles_y; j++) { - struct cmd_bin *bin = lp_get_bin(scene, i, j); - rasterize_bin( rast, thread_index, - bin, i * TILE_SIZE, j * TILE_SIZE ); + struct cmd_bin *bin = lp_scene_get_bin(scene, i, j); + rasterize_bin(task, bin, i, j); } } } @@ -793,7 +737,7 @@ rasterize_scene( struct lp_rasterizer *rast, assert(scene); while ((bin = lp_scene_bin_iter_next(scene, &x, &y))) { if (!is_empty_bin( bin )) - rasterize_bin( rast, thread_index, bin, x * TILE_SIZE, y * TILE_SIZE); + rasterize_bin(task, bin, x, y); } } #endif @@ -804,44 +748,23 @@ rasterize_scene( struct lp_rasterizer *rast, * Called by setup module when it has something for us to render. */ void -lp_rasterize_scene( struct lp_rasterizer *rast, - struct lp_scene *scene, - const struct pipe_framebuffer_state *fb, - bool write_depth ) +lp_rast_queue_scene( struct lp_rasterizer *rast, + struct lp_scene *scene) { - boolean debug = false; - LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); - if (debug) { - unsigned x, y; - debug_printf("rasterize scene:\n"); - debug_printf(" data size: %u\n", lp_scene_data_size(scene)); - for (y = 0; y < scene->tiles_y; y++) { - for (x = 0; x < scene->tiles_x; x++) { - debug_printf(" bin %u, %u size: %u\n", x, y, - lp_scene_bin_size(scene, x, y)); - } - } - } - - /* save framebuffer state in the bin */ - util_copy_framebuffer_state(&scene->fb, fb); - scene->write_depth = write_depth; - if (rast->num_threads == 0) { /* no threading */ - lp_rast_begin( rast, fb, - fb->nr_cbufs != 0, /* always write color if cbufs present */ - fb->zsbuf != NULL && write_depth ); + lp_rast_begin( rast, scene ); - lp_scene_bin_iter_begin( scene ); - rasterize_scene( rast, 0, scene, write_depth ); + rasterize_scene( &rast->tasks[0], scene ); - release_scene( rast, scene ); + lp_scene_reset( scene ); lp_rast_end( rast ); + + rast->curr_scene = NULL; } else { /* threaded rendering! */ @@ -853,14 +776,26 @@ lp_rasterize_scene( struct lp_rasterizer *rast, for (i = 0; i < rast->num_threads; i++) { pipe_semaphore_signal(&rast->tasks[i].work_ready); } + } + + LP_DBG(DEBUG_SETUP, "%s done \n", __FUNCTION__); +} + + +void +lp_rast_finish( struct lp_rasterizer *rast ) +{ + if (rast->num_threads == 0) { + /* nothing to do */ + } + else { + int i; /* wait for work to complete */ for (i = 0; i < rast->num_threads; i++) { pipe_semaphore_wait(&rast->tasks[i].work_done); } } - - LP_DBG(DEBUG_SETUP, "%s done \n", __FUNCTION__); } @@ -883,24 +818,16 @@ static PIPE_THREAD_ROUTINE( thread_func, init_data ) debug_printf("thread %d waiting for work\n", task->thread_index); pipe_semaphore_wait(&task->work_ready); + if (rast->exit_flag) + break; + if (task->thread_index == 0) { /* thread[0]: * - get next scene to rasterize * - map the framebuffer surfaces */ - const struct pipe_framebuffer_state *fb; - boolean write_depth; - - rast->curr_scene = lp_scene_dequeue( rast->full_scenes, TRUE ); - - lp_scene_bin_iter_begin( rast->curr_scene ); - - fb = &rast->curr_scene->fb; - write_depth = rast->curr_scene->write_depth; - - lp_rast_begin( rast, fb, - fb->nr_cbufs != 0, - fb->zsbuf != NULL && write_depth ); + lp_rast_begin( rast, + lp_scene_dequeue( rast->full_scenes, TRUE ) ); } /* Wait for all threads to get here so that threads[1+] don't @@ -911,26 +838,23 @@ static PIPE_THREAD_ROUTINE( thread_func, init_data ) /* do work */ if (debug) debug_printf("thread %d doing work\n", task->thread_index); - rasterize_scene(rast, - task->thread_index, - rast->curr_scene, - rast->curr_scene->write_depth); + + rasterize_scene(task, + rast->curr_scene); /* wait for all threads to finish with this scene */ pipe_barrier_wait( &rast->barrier ); + /* XXX: shouldn't be necessary: + */ if (task->thread_index == 0) { - /* thread[0]: - * - release the scene object - * - unmap the framebuffer surfaces - */ - release_scene( rast, rast->curr_scene ); lp_rast_end( rast ); } /* signal done with work */ if (debug) debug_printf("thread %d done working\n", task->thread_index); + pipe_semaphore_signal(&task->work_done); } @@ -946,16 +870,6 @@ create_rast_threads(struct lp_rasterizer *rast) { unsigned i; -#ifdef PIPE_OS_WINDOWS - /* Multithreading not supported on windows until conditions and barriers are - * properly implemented. */ - rast->num_threads = 0; -#else - rast->num_threads = util_cpu_caps.nr_cpus; - rast->num_threads = debug_get_num_option("LP_NUM_THREADS", rast->num_threads); - rast->num_threads = MIN2(rast->num_threads, MAX_THREADS); -#endif - /* NOTE: if num_threads is zero, we won't use any threads */ for (i = 0; i < rast->num_threads; i++) { pipe_semaphore_init(&rast->tasks[i].work_ready, 0); @@ -968,36 +882,30 @@ create_rast_threads(struct lp_rasterizer *rast) /** - * Create new lp_rasterizer. - * \param empty the queue to put empty scenes on after we've finished - * processing them. + * Create new lp_rasterizer. If num_threads is zero, don't create any + * new threads, do rendering synchronously. + * \param num_threads number of rasterizer threads to create */ struct lp_rasterizer * -lp_rast_create( struct pipe_screen *screen, struct lp_scene_queue *empty ) +lp_rast_create( unsigned num_threads ) { struct lp_rasterizer *rast; - unsigned i, cbuf; + unsigned i; rast = CALLOC_STRUCT(lp_rasterizer); if(!rast) return NULL; - rast->screen = screen; - - rast->empty_scenes = empty; rast->full_scenes = lp_scene_queue_create(); for (i = 0; i < Elements(rast->tasks); i++) { struct lp_rasterizer_task *task = &rast->tasks[i]; - - for (cbuf = 0; cbuf < PIPE_MAX_COLOR_BUFS; cbuf++ ) - task->tile.color[cbuf] = align_malloc(TILE_SIZE * TILE_SIZE * 4, 16); - - task->tile.depth = align_malloc(TILE_SIZE * TILE_SIZE * 4, 16); task->rast = rast; task->thread_index = i; } + rast->num_threads = num_threads; + create_rast_threads(rast); /* for synchronizing rasterization threads */ @@ -1011,14 +919,26 @@ lp_rast_create( struct pipe_screen *screen, struct lp_scene_queue *empty ) */ void lp_rast_destroy( struct lp_rasterizer *rast ) { - unsigned i, cbuf; + unsigned i; - util_unreference_framebuffer_state(&rast->state.fb); + /* Set exit_flag and signal each thread's work_ready semaphore. + * Each thread will be woken up, notice that the exit_flag is set and + * break out of its main loop. The thread will then exit. + */ + rast->exit_flag = TRUE; + for (i = 0; i < rast->num_threads; i++) { + pipe_semaphore_signal(&rast->tasks[i].work_ready); + } - for (i = 0; i < Elements(rast->tasks); i++) { - align_free(rast->tasks[i].tile.depth); - for (cbuf = 0; cbuf < PIPE_MAX_COLOR_BUFS; cbuf++ ) - align_free(rast->tasks[i].tile.color[cbuf]); + /* Wait for threads to terminate before cleaning up per-thread data */ + for (i = 0; i < rast->num_threads; i++) { + pipe_thread_wait(rast->threads[i]); + } + + /* Clean up per-thread data */ + for (i = 0; i < rast->num_threads; i++) { + pipe_semaphore_destroy(&rast->tasks[i].work_ready); + pipe_semaphore_destroy(&rast->tasks[i].work_done); } /* for synchronizing rasterization threads */