task->bin = bin;
task->x = x * TILE_SIZE;
task->y = y * TILE_SIZE;
+ task->width = TILE_SIZE + x * TILE_SIZE > task->scene->width_aligned ?
+ task->scene->width_aligned - x * TILE_SIZE : TILE_SIZE;
+ task->height = TILE_SIZE + y * TILE_SIZE > task->scene->height_aligned ?
+ task->scene->height_aligned - y * TILE_SIZE : TILE_SIZE;
/* reset pointers to color and depth tile(s) */
memset(task->color_tiles, 0, sizeof(task->color_tiles));
scene->cbufs[i].stride,
task->x,
task->y,
- TILE_SIZE,
- TILE_SIZE,
+ task->width,
+ task->height,
&uc);
}
}
scene->cbufs[i].stride,
task->x,
task->y,
- TILE_SIZE,
- TILE_SIZE,
+ task->width,
+ task->height,
&uc);
}
}
uint64_t clear_mask64 = arg.clear_zstencil.mask;
uint32_t clear_value = (uint32_t) clear_value64;
uint32_t clear_mask = (uint32_t) clear_mask64;
- const unsigned height = TILE_SIZE;
- const unsigned width = TILE_SIZE;
+ const unsigned height = task->height;
+ const unsigned width = task->width;
const unsigned block_size = scene->zsbuf.blocksize;
const unsigned dst_stride = scene->zsbuf.stride;
uint8_t *dst;
variant = state->variant;
/* render the whole 64x64 tile in 4x4 chunks */
- for (y = 0; y < TILE_SIZE; y += 4){
- for (x = 0; x < TILE_SIZE; x += 4) {
+ for (y = 0; y < task->height; y += 4){
+ for (x = 0; x < task->width; x += 4) {
uint8_t *color[PIPE_MAX_COLOR_BUFS];
unsigned stride[PIPE_MAX_COLOR_BUFS];
uint8_t *depth = NULL;
assert(lp_check_alignment(state->jit_context.u8_blend_color, 16));
- /* run shader on 4x4 block */
- BEGIN_JIT_CALL(state, task);
- variant->jit_function[RAST_EDGE_TEST](&state->jit_context,
- x, y,
- inputs->frontfacing,
- GET_A0(inputs),
- GET_DADX(inputs),
- GET_DADY(inputs),
- color,
- depth,
- mask,
- &task->thread_data,
- stride,
- depth_stride);
- END_JIT_CALL();
+ /*
+ * The rasterizer may produce fragments outside our
+ * allocated 4x4 blocks hence need to filter them out here.
+ */
+ if ((x % TILE_SIZE) < task->width && (y % TILE_SIZE) < task->height) {
+ /* run shader on 4x4 block */
+ BEGIN_JIT_CALL(state, task);
+ variant->jit_function[RAST_EDGE_TEST](&state->jit_context,
+ x, y,
+ inputs->frontfacing,
+ GET_A0(inputs),
+ GET_DADX(inputs),
+ GET_DADY(inputs),
+ color,
+ depth,
+ mask,
+ &task->thread_data,
+ stride,
+ depth_stride);
+ END_JIT_CALL();
+ }
}
#define FIXED_ORDER 4
#define FIXED_ONE (1<<FIXED_ORDER)
+/* Rasterizer output size going to jit fs, width/height */
+#define LP_RASTER_BLOCK_SIZE 4
+
struct lp_rasterizer_task;
struct lp_scene *scene;
unsigned x, y; /**< Pos of this tile in framebuffer, in pixels */
+ unsigned width, height; /**< width, height of current tile, in pixels */
uint8_t *color_tiles[PIPE_MAX_COLOR_BUFS];
uint8_t *depth_tile;
depth_stride = scene->zsbuf.stride;
}
- /* run shader on 4x4 block */
- BEGIN_JIT_CALL(state, task);
- variant->jit_function[RAST_WHOLE]( &state->jit_context,
- x, y,
- inputs->frontfacing,
- GET_A0(inputs),
- GET_DADX(inputs),
- GET_DADY(inputs),
- color,
- depth,
- 0xffff,
- &task->thread_data,
- stride,
- depth_stride);
- END_JIT_CALL();
+ /*
+ * The rasterizer may produce fragments outside our
+ * allocated 4x4 blocks hence need to filter them out here.
+ */
+ if ((x % TILE_SIZE) < task->width && (y % TILE_SIZE) < task->height) {
+ /* run shader on 4x4 block */
+ BEGIN_JIT_CALL(state, task);
+ variant->jit_function[RAST_WHOLE]( &state->jit_context,
+ x, y,
+ inputs->frontfacing,
+ GET_A0(inputs),
+ GET_DADX(inputs),
+ GET_DADY(inputs),
+ color,
+ depth,
+ 0xffff,
+ &task->thread_data,
+ stride,
+ depth_stride);
+ END_JIT_CALL();
+ }
}
void lp_rast_triangle_1( struct lp_rasterizer_task *,
scene->tiles_x = align(fb->width, TILE_SIZE) / TILE_SIZE;
scene->tiles_y = align(fb->height, TILE_SIZE) / TILE_SIZE;
+ scene->width_aligned = align(fb->width, LP_RASTER_BLOCK_SIZE);
+ scene->height_aligned = align(fb->height, LP_RASTER_BLOCK_SIZE);
assert(scene->tiles_x <= TILES_X);
assert(scene->tiles_y <= TILES_Y);
/** list of resources referenced by the scene commands */
struct resource_ref *resources;
+ /** aligned scene width, height */
+ unsigned width_aligned;
+ unsigned height_aligned;
+
/** Total memory used by the scene (in bytes). This sums all the
* data blocks and counts all bins, state, resource references and
* other random allocations within the scene.
assert(last_level <= res->last_level);
/*
- * The complexity here is only necessary for depth textures which
- * still are tiled.
+ * The complexity here should no longer be necessary.
*/
mip_ptr = llvmpipe_get_texture_image_all(lp_tex, first_level,
LP_TEX_USAGE_READ);
#include "lp_texture.h"
#include "lp_setup.h"
#include "lp_state.h"
+#include "lp_rast.h"
#include "state_tracker/sw_winsys.h"
{
unsigned alignment, nblocksx, nblocksy, block_size;
- /* For non-compressed formats we need to align the texture size
- * to the tile size to facilitate render-to-texture.
- * XXX this blows up 1d/1d array textures by unreasonable
- * amount (factor 64), probably should do something about it.
+ /* For non-compressed formats we need 4x4 pixel alignment
+ * (for now). We also want cache line size in x direction,
+ * otherwise same cache line could end up in multiple threads.
+ * XXX this blows up 1d/1d array textures by a factor of 4.
*/
if (util_format_is_compressed(pt->format))
alignment = 1;
else
- alignment = TILE_SIZE;
+ alignment = LP_RASTER_BLOCK_SIZE;
nblocksx = util_format_get_nblocksx(pt->format,
align(width, alignment));
align(height, alignment));
block_size = util_format_get_blocksize(pt->format);
- lpr->row_stride[level] = align(nblocksx * block_size, 16);
+ if (util_format_is_compressed(pt->format))
+ lpr->row_stride[level] = nblocksx * block_size;
+ else
+ lpr->row_stride[level] = align(nblocksx * block_size, util_cpu_caps.cacheline);
/* if row_stride * height > LP_MAX_TEXTURE_SIZE */
if (lpr->row_stride[level] > LP_MAX_TEXTURE_SIZE / nblocksy) {
assert(templat->height0 == 1);
assert(templat->depth0 == 1);
assert(templat->last_level == 0);
- lpr->data = align_malloc(bytes, 16);
+ /*
+ * Reserve some extra storage since if we'd render to a buffer we
+ * read/write always LP_RASTER_BLOCK_SIZE pixels, but the element
+ * offset doesn't need to be aligned to LP_RASTER_BLOCK_SIZE.
+ */
+ lpr->data = align_malloc(bytes + (LP_RASTER_BLOCK_SIZE - 1) * 4 * sizeof(float), 16);
/*
* buffers don't really have stride but it's probably safer
* (for code doing same calculations for buffers and textures)
struct llvmpipe_screen *screen = llvmpipe_screen(resource->screen);
struct sw_winsys *winsys = screen->winsys;
unsigned dt_usage;
- uint8_t *map2;
if (tex_usage == LP_TEX_USAGE_READ) {
dt_usage = PIPE_TRANSFER_READ;
/* install this linear image in texture data structure */
lpr->linear_img.data = map;
- /* make sure tiled data gets converted to linear data */
- map2 = llvmpipe_get_texture_image(lpr, 0, 0, tex_usage);
- return map2;
+ return map;
}
else if (llvmpipe_resource_is_texture(resource)) {
- map = llvmpipe_get_texture_image(lpr, layer, level,
- tex_usage);
+ map = llvmpipe_get_texture_image(lpr, layer, level, tex_usage);
return map;
}
else {