From: José Fonseca Date: Fri, 8 Jan 2010 15:42:57 +0000 (+0000) Subject: Merge remote branch 'origin/master' into lp-binning X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=080c40ab32b2abd6d8381b4a0cc143d36a1652b2;p=mesa.git Merge remote branch 'origin/master' into lp-binning Conflicts: src/gallium/auxiliary/util/u_surface.c src/gallium/drivers/llvmpipe/Makefile src/gallium/drivers/llvmpipe/SConscript src/gallium/drivers/llvmpipe/lp_bld_arit.c src/gallium/drivers/llvmpipe/lp_bld_flow.c src/gallium/drivers/llvmpipe/lp_bld_interp.c src/gallium/drivers/llvmpipe/lp_clear.c src/gallium/drivers/llvmpipe/lp_context.c src/gallium/drivers/llvmpipe/lp_context.h src/gallium/drivers/llvmpipe/lp_draw_arrays.c src/gallium/drivers/llvmpipe/lp_jit.c src/gallium/drivers/llvmpipe/lp_jit.h src/gallium/drivers/llvmpipe/lp_prim_vbuf.c src/gallium/drivers/llvmpipe/lp_setup.c src/gallium/drivers/llvmpipe/lp_setup_point.c src/gallium/drivers/llvmpipe/lp_state.h src/gallium/drivers/llvmpipe/lp_state_blend.c src/gallium/drivers/llvmpipe/lp_state_derived.c src/gallium/drivers/llvmpipe/lp_state_fs.c src/gallium/drivers/llvmpipe/lp_state_sampler.c src/gallium/drivers/llvmpipe/lp_state_surface.c src/gallium/drivers/llvmpipe/lp_tex_cache.c src/gallium/drivers/llvmpipe/lp_tex_cache.h src/gallium/drivers/llvmpipe/lp_tex_sample.h src/gallium/drivers/llvmpipe/lp_tile_cache.c --- 080c40ab32b2abd6d8381b4a0cc143d36a1652b2 diff --cc progs/demos/gloss.c index d32e8f8c68b,578736b4e27..450861e5778 --- a/progs/demos/gloss.c +++ b/progs/demos/gloss.c @@@ -441,10 -436,10 +441,10 @@@ static void Init( int argc, char *argv[ int main( int argc, char *argv[] ) { - glutInit( &argc, argv ); glutInitWindowSize(WinWidth, WinHeight); + glutInit( &argc, argv ); glutInitDisplayMode( GLUT_RGB | GLUT_DOUBLE | GLUT_DEPTH ); - glutCreateWindow(argv[0] ); + Win = glutCreateWindow(argv[0] ); glewInit(); glutReshapeFunc( Reshape ); glutKeyboardFunc( Key ); diff --cc src/gallium/auxiliary/util/u_surface.c index cfdf7ab8f8a,35c49782043..f66376ad750 --- a/src/gallium/auxiliary/util/u_surface.c +++ b/src/gallium/auxiliary/util/u_surface.c @@@ -36,7 -36,7 +36,8 @@@ #include "pipe/p_state.h" #include "pipe/p_defines.h" +#include "util/u_memory.h" + #include "util/u_format.h" #include "util/u_surface.h" diff --cc src/gallium/drivers/llvmpipe/Makefile index 6ec97046e15,7c6e46006b9..264999a7cea --- a/src/gallium/drivers/llvmpipe/Makefile +++ b/src/gallium/drivers/llvmpipe/Makefile @@@ -54,10 -49,15 +57,13 @@@ C_SOURCES = lp_state_vertex.c \ lp_state_vs.c \ lp_surface.c \ - lp_tex_cache.c \ lp_tex_sample_llvm.c \ lp_texture.c \ - lp_tile_cache.c \ lp_tile_soa.c + CPP_SOURCES = \ + lp_bld_misc.cpp + include ../../Makefile.template lp_tile_soa.c: lp_tile_soa.py ../../auxiliary/util/u_format_parse.py ../../auxiliary/util/u_format_access.py ../../auxiliary/util/u_format.csv diff --cc src/gallium/drivers/llvmpipe/SConscript index ae4303bd24f,6bb545a501f..5af77c4a12d --- a/src/gallium/drivers/llvmpipe/SConscript +++ b/src/gallium/drivers/llvmpipe/SConscript @@@ -33,9 -36,12 +36,12 @@@ llvmpipe = env.ConvenienceLibrary 'lp_bld_format_soa.c', 'lp_bld_interp.c', 'lp_bld_intr.c', + 'lp_bld_logic.c', + 'lp_bld_misc.cpp', + 'lp_bld_pack.c', + 'lp_bld_sample.c', 'lp_bld_sample_soa.c', 'lp_bld_struct.c', - 'lp_bld_logic.c', 'lp_bld_swizzle.c', 'lp_bld_tgsi_soa.c', 'lp_bld_type.c', diff --cc src/gallium/drivers/llvmpipe/lp_bld_interp.c index affeeca6ff9,49dab8ab61e..daedf40d558 --- a/src/gallium/drivers/llvmpipe/lp_bld_interp.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.c @@@ -142,39 -108,30 +142,13 @@@ coeffs_init(struct lp_build_interp_soa_ } - /** - * Small vector x scale multiplication optimization. - * - * TODO: Should be elsewhere. - */ - static LLVMValueRef - coeff_multiply(struct lp_build_interp_soa_context *bld, - LLVMValueRef coeff, - int step) - { - LLVMValueRef factor; - - switch(step) { - case 0: - return bld->base.zero; - case 1: - return coeff; - case 2: - return lp_build_add(&bld->base, coeff, coeff); - default: - factor = lp_build_const_scalar(bld->base.type, (double)step); - return lp_build_mul(&bld->base, coeff, factor); - } - } - - /** - * Multiply the dadx and dady with the xstep and ystep respectively. + * Emit LLVM code to compute the fragment shader input attribute values. + * For example, for a color input, we'll compute red, green, blue and alpha + * values for the four pixels in a quad. + * Recall that we're operating on 4-element vectors so each arithmetic + * operation is operating on the four pixels in a quad. */ -static void -coeffs_update(struct lp_build_interp_soa_context *bld) -{ - unsigned attrib; - unsigned chan; - - for(attrib = 0; attrib < bld->num_attribs; ++attrib) { - unsigned mask = bld->mask[attrib]; - unsigned mode = bld->mode[attrib]; - if (mode != TGSI_INTERPOLATE_CONSTANT) { - for(chan = 0; chan < NUM_CHANNELS; ++chan) { - if(mask & (1 << chan)) { - bld->dadx[attrib][chan] = lp_build_mul_imm(&bld->base, bld->dadx[attrib][chan], bld->xstep); - bld->dady[attrib][chan] = lp_build_mul_imm(&bld->base, bld->dady[attrib][chan], bld->ystep); - } - } - } - } -} - - static void attribs_init(struct lp_build_interp_soa_context *bld) { diff --cc src/gallium/drivers/llvmpipe/lp_bld_type.c index e8cf7256c0e,1320a267214..8270cd057f6 --- a/src/gallium/drivers/llvmpipe/lp_bld_type.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_type.c @@@ -157,27 -157,6 +157,27 @@@ lp_build_int_vec_type(struct lp_type ty } +/** + * Build int32[4] vector type + */ +LLVMTypeRef - lp_build_int32_vec4_type() ++lp_build_int32_vec4_type(void) +{ + struct lp_type t; + LLVMTypeRef type; + + memset(&t, 0, sizeof(t)); + t.floating = FALSE; /* floating point values */ + t.sign = TRUE; /* values are signed */ + t.norm = FALSE; /* values are not limited to [0,1] or [-1,1] */ + t.width = 32; /* 32-bit int */ + t.length = 4; /* 4 elements per vector */ + + type = lp_build_int_elem_type(t); + return LLVMVectorType(type, t.length); +} + + struct lp_type lp_int_type(struct lp_type type) { diff --cc src/gallium/drivers/llvmpipe/lp_context.c index 696a9d5f6a8,1cc3c9227cc..8d965175f8c --- a/src/gallium/drivers/llvmpipe/lp_context.c +++ b/src/gallium/drivers/llvmpipe/lp_context.c @@@ -68,6 -117,12 +69,10 @@@ static void llvmpipe_destroy( struct pi pipe_texture_reference(&llvmpipe->texture[i], NULL); } + for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) { - lp_destroy_tex_tile_cache(llvmpipe->vertex_tex_cache[i]); + pipe_texture_reference(&llvmpipe->vertex_textures[i], NULL); + } + for (i = 0; i < Elements(llvmpipe->constants); i++) { if (llvmpipe->constants[i].buffer) { pipe_buffer_reference(&llvmpipe->constants[i].buffer, NULL); diff --cc src/gallium/drivers/llvmpipe/lp_draw_arrays.c index 91fcbc01c6d,c152b4413fc..3989cce7445 --- a/src/gallium/drivers/llvmpipe/lp_draw_arrays.c +++ b/src/gallium/drivers/llvmpipe/lp_draw_arrays.c @@@ -108,7 -112,16 +108,12 @@@ llvmpipe_draw_range_elements(struct pip draw_set_mapped_element_buffer(draw, 0, NULL); } - return TRUE; + /* + * TODO: Flush only when a user vertex/index buffer is present + * (or even better, modify draw module to do this + * internally when this condition is seen?) + */ + draw_flush(draw); - - /* Note: leave drawing surfaces mapped */ - - lp->dirty_render_cache = TRUE; } diff --cc src/gallium/drivers/llvmpipe/lp_fence.c index 14fbea6d993,00000000000..97c46087da0 mode 100644,000000..100644 --- a/src/gallium/drivers/llvmpipe/lp_fence.c +++ b/src/gallium/drivers/llvmpipe/lp_fence.c @@@ -1,109 -1,0 +1,109 @@@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "pipe/p_screen.h" +#include "util/u_memory.h" +#include "lp_fence.h" + + +struct lp_fence * +lp_fence_create(unsigned rank) +{ + struct lp_fence *fence = CALLOC_STRUCT(lp_fence); + + pipe_reference_init(&fence->reference, 1); + + pipe_mutex_init(fence->mutex); + pipe_condvar_init(fence->signalled); + + fence->rank = rank; + + return fence; +} + + +static void +lp_fence_destroy(struct lp_fence *fence) +{ + pipe_mutex_destroy(fence->mutex); + pipe_condvar_destroy(fence->signalled); + FREE(fence); +} + + +static void +llvmpipe_fence_reference(struct pipe_screen *screen, + struct pipe_fence_handle **ptr, + struct pipe_fence_handle *fence) +{ + struct lp_fence *old = (struct lp_fence *) *ptr; + struct lp_fence *f = (struct lp_fence *) fence; + - if (pipe_reference((struct pipe_reference**)ptr, &f->reference)) { ++ if (pipe_reference(&old->reference, &f->reference)) { + lp_fence_destroy(old); + } +} + + +static int +llvmpipe_fence_signalled(struct pipe_screen *screen, + struct pipe_fence_handle *fence, + unsigned flag) +{ + struct lp_fence *f = (struct lp_fence *) fence; + + return f->count == f->rank; +} + + +static int +llvmpipe_fence_finish(struct pipe_screen *screen, + struct pipe_fence_handle *fence_handle, + unsigned flag) +{ + struct lp_fence *fence = (struct lp_fence *) fence_handle; + + pipe_mutex_lock(fence->mutex); + while (fence->count < fence->rank) { + pipe_condvar_wait(fence->signalled, fence->mutex); + } + pipe_mutex_unlock(fence->mutex); + + return 0; +} + + + + +void +llvmpipe_init_screen_fence_funcs(struct pipe_screen *screen) +{ + screen->fence_reference = llvmpipe_fence_reference; + screen->fence_signalled = llvmpipe_fence_signalled; + screen->fence_finish = llvmpipe_fence_finish; +} diff --cc src/gallium/drivers/llvmpipe/lp_jit.h index e8fb7d990f8,277b690c02c..1a6e939aa24 --- a/src/gallium/drivers/llvmpipe/lp_jit.h +++ b/src/gallium/drivers/llvmpipe/lp_jit.h @@@ -114,17 -108,10 +108,16 @@@ typedef voi const void *a0, const void *dadx, const void *dady, - uint32_t *mask, void *color, - void *depth); + void *depth, + const int32_t c1, + const int32_t c2, + const int32_t c3, + const int32_t *step1, + const int32_t *step2, + const int32_t *step3); + - void lp_jit_screen_cleanup(struct llvmpipe_screen *screen); diff --cc src/gallium/drivers/llvmpipe/lp_rast.c index 6772ff332ba,00000000000..6535e693089 mode 100644,000000..100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@@ -1,793 -1,0 +1,793 @@@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include +#include "util/u_memory.h" +#include "util/u_math.h" +#include "util/u_cpu_detect.h" +#include "util/u_surface.h" + +#include "lp_scene_queue.h" +#include "lp_debug.h" +#include "lp_fence.h" +#include "lp_rast.h" +#include "lp_rast_priv.h" +#include "lp_tile_soa.h" +#include "lp_bld_debug.h" +#include "lp_scene.h" + + +/** + * Begin the rasterization phase. + * Map the framebuffer surfaces. Initialize the 'rast' state. + */ +static boolean +lp_rast_begin( struct lp_rasterizer *rast, + const struct pipe_framebuffer_state *fb, + boolean write_color, + boolean write_zstencil ) +{ + struct pipe_screen *screen = rast->screen; + struct pipe_surface *cbuf, *zsbuf; + + LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__); + + util_copy_framebuffer_state(&rast->state.fb, fb); + + rast->state.write_zstencil = write_zstencil; + rast->state.write_color = write_color; + + rast->check_for_clipped_tiles = (fb->width % TILE_SIZE != 0 || + fb->height % TILE_SIZE != 0); + + /* XXX support multiple color buffers here */ + cbuf = rast->state.fb.cbufs[0]; + if (cbuf) { + rast->cbuf_transfer = screen->get_tex_transfer(rast->screen, + cbuf->texture, + cbuf->face, + cbuf->level, + cbuf->zslice, + PIPE_TRANSFER_READ_WRITE, + 0, 0, + fb->width, fb->height); + if (!rast->cbuf_transfer) + return FALSE; + + rast->cbuf_map = screen->transfer_map(rast->screen, + rast->cbuf_transfer); + if (!rast->cbuf_map) + return FALSE; + } + + zsbuf = rast->state.fb.zsbuf; + if (zsbuf) { + rast->zsbuf_transfer = screen->get_tex_transfer(rast->screen, + zsbuf->texture, + zsbuf->face, + zsbuf->level, + zsbuf->zslice, + PIPE_TRANSFER_READ_WRITE, + 0, 0, + fb->width, fb->height); + if (!rast->zsbuf_transfer) + return FALSE; + + rast->zsbuf_map = screen->transfer_map(rast->screen, + rast->zsbuf_transfer); + if (!rast->zsbuf_map) + return FALSE; + } + + return TRUE; +} + + +/** + * Finish the rasterization phase. + * Unmap framebuffer surfaces. + */ +static void +lp_rast_end( struct lp_rasterizer *rast ) +{ + struct pipe_screen *screen = rast->screen; + + if (rast->cbuf_map) + screen->transfer_unmap(screen, rast->cbuf_transfer); + + if (rast->zsbuf_map) + screen->transfer_unmap(screen, rast->zsbuf_transfer); + + if (rast->cbuf_transfer) + screen->tex_transfer_destroy(rast->cbuf_transfer); + + if (rast->zsbuf_transfer) + screen->tex_transfer_destroy(rast->zsbuf_transfer); + + rast->cbuf_transfer = NULL; + rast->zsbuf_transfer = NULL; + rast->cbuf_map = NULL; + rast->zsbuf_map = NULL; +} + + +/** + * Begining rasterization of a tile. + * \param x window X position of the tile, in pixels + * \param y window Y position of the tile, in pixels + */ +static void +lp_rast_start_tile( struct lp_rasterizer *rast, + unsigned thread_index, + unsigned x, unsigned y ) +{ + LP_DBG(DEBUG_RAST, "%s %d,%d\n", __FUNCTION__, x, y); + + rast->tasks[thread_index].x = x; + rast->tasks[thread_index].y = y; +} + + +/** + * Clear the rasterizer's current color tile. + * This is a bin command called during bin processing. + */ +void lp_rast_clear_color( struct lp_rasterizer *rast, + unsigned thread_index, + const union lp_rast_cmd_arg arg ) +{ + const uint8_t *clear_color = arg.clear_color; + uint8_t *color_tile = rast->tasks[thread_index].tile.color; + + LP_DBG(DEBUG_RAST, "%s 0x%x,0x%x,0x%x,0x%x\n", __FUNCTION__, + clear_color[0], + clear_color[1], + clear_color[2], + clear_color[3]); + + if (clear_color[0] == clear_color[1] && + clear_color[1] == clear_color[2] && + clear_color[2] == clear_color[3]) { + memset(color_tile, clear_color[0], TILE_SIZE * TILE_SIZE * 4); + } + else { + unsigned x, y, chan; + for (y = 0; y < TILE_SIZE; y++) + for (x = 0; x < TILE_SIZE; x++) + for (chan = 0; chan < 4; ++chan) + TILE_PIXEL(color_tile, x, y, chan) = clear_color[chan]; + } +} + + +/** + * Clear the rasterizer's current z/stencil tile. + * This is a bin command called during bin processing. + */ +void lp_rast_clear_zstencil( struct lp_rasterizer *rast, + unsigned thread_index, + const union lp_rast_cmd_arg arg) +{ + unsigned i, j; + uint32_t *depth_tile = rast->tasks[thread_index].tile.depth; + + LP_DBG(DEBUG_RAST, "%s 0x%x\n", __FUNCTION__, arg.clear_zstencil); + + for (i = 0; i < TILE_SIZE; i++) + for (j = 0; j < TILE_SIZE; j++) + depth_tile[i*TILE_SIZE + j] = arg.clear_zstencil; +} + + +/** + * Load tile color from the framebuffer surface. + * This is a bin command called during bin processing. + */ +void lp_rast_load_color( struct lp_rasterizer *rast, + unsigned thread_index, + const union lp_rast_cmd_arg arg) +{ + struct lp_rasterizer_task *task = &rast->tasks[thread_index]; + const unsigned x = task->x; + const unsigned y = task->y; + int w = TILE_SIZE; + int h = TILE_SIZE; + + LP_DBG(DEBUG_RAST, "%s at %u, %u\n", __FUNCTION__, x, y); + + if (x + w > rast->state.fb.width) + w -= x + w - rast->state.fb.width; + + if (y + h > rast->state.fb.height) + h -= y + h - rast->state.fb.height; + + assert(w >= 0); + assert(h >= 0); + assert(w <= TILE_SIZE); + assert(h <= TILE_SIZE); + - lp_tile_read_4ub(rast->cbuf_transfer->format, ++ lp_tile_read_4ub(rast->cbuf_transfer->texture->format, + rast->tasks[thread_index].tile.color, + rast->cbuf_map, + rast->cbuf_transfer->stride, + x, y, + w, h); +} + + +/** + * Load tile z/stencil from the framebuffer surface. + * This is a bin command called during bin processing. + */ +void lp_rast_load_zstencil( struct lp_rasterizer *rast, + unsigned thread_index, + const union lp_rast_cmd_arg arg ) +{ + LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__); + + /* call u_tile func to load depth (and stencil?) from surface */ +} + + +void lp_rast_set_state( struct lp_rasterizer *rast, + unsigned thread_index, + const union lp_rast_cmd_arg arg ) +{ + const struct lp_rast_state *state = arg.set_state; + + LP_DBG(DEBUG_RAST, "%s %p\n", __FUNCTION__, (void *) state); + + /* just set the current state pointer for this rasterizer */ + rast->tasks[thread_index].current_state = state; +} + + + +/* Within a tile: + */ + +/** + * Run the shader on all blocks in a tile. This is used when a tile is + * completely contained inside a triangle. + * This is a bin command called during bin processing. + */ +void lp_rast_shade_tile( struct lp_rasterizer *rast, + unsigned thread_index, + const union lp_rast_cmd_arg arg ) +{ + /* Set c1,c2,c3 to large values so the in/out test always passes */ + const int32_t c1 = INT_MIN, c2 = INT_MIN, c3 = INT_MIN; + const struct lp_rast_shader_inputs *inputs = arg.shade_tile; + const unsigned tile_x = rast->tasks[thread_index].x; + const unsigned tile_y = rast->tasks[thread_index].y; + unsigned x, y; + + LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__); + + /* Use the existing preference for 4x4 (four quads) shading: + */ + for (y = 0; y < TILE_SIZE; y += 4) + for (x = 0; x < TILE_SIZE; x += 4) + lp_rast_shade_quads( rast, + thread_index, + inputs, + tile_x + x, + tile_y + y, + c1, c2, c3); +} + + +/** + * Compute shading for a 4x4 block of pixels. + * This is a bin command called during bin processing. + */ +void lp_rast_shade_quads( struct lp_rasterizer *rast, + unsigned thread_index, + const struct lp_rast_shader_inputs *inputs, + unsigned x, unsigned y, + int32_t c1, int32_t c2, int32_t c3) +{ + const struct lp_rast_state *state = rast->tasks[thread_index].current_state; + struct lp_rast_tile *tile = &rast->tasks[thread_index].tile; + void *color; + void *depth; + unsigned ix, iy; + int block_offset; + +#ifdef DEBUG + assert(state); + + /* Sanity checks */ + assert(x % TILE_VECTOR_WIDTH == 0); + assert(y % TILE_VECTOR_HEIGHT == 0); + + assert((x % 4) == 0); + assert((y % 4) == 0); +#endif + + ix = x % TILE_SIZE; + iy = y % TILE_SIZE; + + /* offset of the 16x16 pixel block within the tile */ + block_offset = ((iy/4)*(16*16) + (ix/4)*16); + + /* color buffer */ + color = tile->color + 4 * block_offset; + + /* depth buffer */ + depth = tile->depth + block_offset; + +#ifdef DEBUG + assert(lp_check_alignment(depth, 16)); + assert(lp_check_alignment(color, 16)); + assert(lp_check_alignment(state->jit_context.blend_color, 16)); + + assert(lp_check_alignment(inputs->step[0], 16)); + assert(lp_check_alignment(inputs->step[1], 16)); + assert(lp_check_alignment(inputs->step[2], 16)); +#endif + + /* run shader */ + state->jit_function( &state->jit_context, + x, y, + inputs->a0, + inputs->dadx, + inputs->dady, + color, + depth, + c1, c2, c3, + inputs->step[0], inputs->step[1], inputs->step[2] + ); +} + + +/* End of tile: + */ + + +/** + * Write the rasterizer's color tile to the framebuffer. + */ +static void lp_rast_store_color( struct lp_rasterizer *rast, + unsigned thread_index) +{ + const unsigned x = rast->tasks[thread_index].x; + const unsigned y = rast->tasks[thread_index].y; + int w = TILE_SIZE; + int h = TILE_SIZE; + + if (x + w > rast->state.fb.width) + w -= x + w - rast->state.fb.width; + + if (y + h > rast->state.fb.height) + h -= y + h - rast->state.fb.height; + + assert(w >= 0); + assert(h >= 0); + assert(w <= TILE_SIZE); + assert(h <= TILE_SIZE); + + LP_DBG(DEBUG_RAST, "%s [%u] %d,%d %dx%d\n", __FUNCTION__, + thread_index, x, y, w, h); + - lp_tile_write_4ub(rast->cbuf_transfer->format, ++ lp_tile_write_4ub(rast->cbuf_transfer->texture->format, + rast->tasks[thread_index].tile.color, + rast->cbuf_map, + rast->cbuf_transfer->stride, + x, y, + w, h); +} + + +static void +lp_tile_write_z32(const uint32_t *src, uint8_t *dst, unsigned dst_stride, + unsigned x0, unsigned y0, unsigned w, unsigned h) +{ + unsigned x, y; + uint8_t *dst_row = dst + y0*dst_stride; + for (y = 0; y < h; ++y) { + uint32_t *dst_pixel = (uint32_t *)(dst_row + x0*4); + for (x = 0; x < w; ++x) { + *dst_pixel++ = *src++; + } + dst_row += dst_stride; + } +} + +/** + * Write the rasterizer's z/stencil tile to the framebuffer. + */ +static void lp_rast_store_zstencil( struct lp_rasterizer *rast, + unsigned thread_index ) +{ + const unsigned x = rast->tasks[thread_index].x; + const unsigned y = rast->tasks[thread_index].y; + unsigned w = TILE_SIZE; + unsigned h = TILE_SIZE; + + if (x + w > rast->state.fb.width) + w -= x + w - rast->state.fb.width; + + if (y + h > rast->state.fb.height) + h -= y + h - rast->state.fb.height; + + LP_DBG(DEBUG_RAST, "%s %d,%d %dx%d\n", __FUNCTION__, x, y, w, h); + - assert(rast->zsbuf_transfer->format == PIPE_FORMAT_Z32_UNORM); ++ assert(rast->zsbuf_transfer->texture->format == PIPE_FORMAT_Z32_UNORM); + lp_tile_write_z32(rast->tasks[thread_index].tile.depth, + rast->zsbuf_map, + rast->zsbuf_transfer->stride, + x, y, w, h); +} + + +/** + * Write the rasterizer's tiles to the framebuffer. + */ +static void +lp_rast_end_tile( struct lp_rasterizer *rast, + unsigned thread_index ) +{ + LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__); + + if (rast->state.write_color) + lp_rast_store_color(rast, thread_index); + + if (rast->state.write_zstencil) + lp_rast_store_zstencil(rast, thread_index); +} + + +/** + * Signal on a fence. This is called during bin execution/rasterization. + * Called per thread. + */ +void lp_rast_fence( struct lp_rasterizer *rast, + unsigned thread_index, + const union lp_rast_cmd_arg arg ) +{ + struct lp_fence *fence = arg.fence; + + pipe_mutex_lock( fence->mutex ); + + fence->count++; + assert(fence->count <= fence->rank); + + LP_DBG(DEBUG_RAST, "%s count=%u rank=%u\n", __FUNCTION__, + fence->count, fence->rank); + + pipe_condvar_signal( fence->signalled ); + + pipe_mutex_unlock( fence->mutex ); +} + + +/** + * When all the threads are done rasterizing a scene, one thread will + * call this function to reset the scene and put it onto the empty queue. + */ +static void +release_scene( struct lp_rasterizer *rast, + struct lp_scene *scene ) +{ + util_unreference_framebuffer_state( &scene->fb ); + + lp_scene_reset( scene ); + lp_scene_enqueue( rast->empty_scenes, scene ); + rast->curr_scene = NULL; +} + + +/** + * Rasterize commands for a single bin. + * \param x, y position of the bin's tile in the framebuffer + * Must be called between lp_rast_begin() and lp_rast_end(). + * Called per thread. + */ +static void +rasterize_bin( struct lp_rasterizer *rast, + unsigned thread_index, + const struct cmd_bin *bin, + int x, int y) +{ + const struct cmd_block_list *commands = &bin->commands; + struct cmd_block *block; + unsigned k; + + lp_rast_start_tile( rast, thread_index, x, y ); + + /* simply execute each of the commands in the block list */ + for (block = commands->head; block; block = block->next) { + for (k = 0; k < block->count; k++) { + block->cmd[k]( rast, thread_index, block->arg[k] ); + } + } + + lp_rast_end_tile( rast, thread_index ); +} + + +/** + * Rasterize/execute all bins within a scene. + * Called per thread. + */ +static void +rasterize_scene( struct lp_rasterizer *rast, + unsigned thread_index, + struct lp_scene *scene, + bool write_depth ) +{ + /* loop over scene bins, rasterize each */ +#if 0 + { + unsigned i, j; + for (i = 0; i < scene->tiles_x; i++) { + for (j = 0; j < scene->tiles_y; j++) { + struct cmd_bin *bin = lp_get_bin(scene, i, j); + rasterize_bin( rast, thread_index, + bin, i * TILE_SIZE, j * TILE_SIZE ); + } + } + } +#else + { + struct cmd_bin *bin; + int x, y; + + assert(scene); + while ((bin = lp_scene_bin_iter_next(scene, &x, &y))) { + rasterize_bin( rast, thread_index, bin, x * TILE_SIZE, y * TILE_SIZE); + } + } +#endif +} + + +/** + * Called by setup module when it has something for us to render. + */ +void +lp_rasterize_scene( struct lp_rasterizer *rast, + struct lp_scene *scene, + const struct pipe_framebuffer_state *fb, + bool write_depth ) +{ + boolean debug = false; + + LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); + + if (debug) { + unsigned x, y; + printf("rasterize scene:\n"); + printf(" data size: %u\n", lp_scene_data_size(scene)); + for (y = 0; y < scene->tiles_y; y++) { + for (x = 0; x < scene->tiles_x; x++) { + printf(" bin %u, %u size: %u\n", x, y, + lp_scene_bin_size(scene, x, y)); + } + } + } + + /* save framebuffer state in the bin */ + util_copy_framebuffer_state(&scene->fb, fb); + scene->write_depth = write_depth; + + if (rast->num_threads == 0) { + /* no threading */ + + lp_rast_begin( rast, fb, + fb->cbufs[0]!= NULL, + fb->zsbuf != NULL && write_depth ); + + lp_scene_bin_iter_begin( scene ); + rasterize_scene( rast, 0, scene, write_depth ); + + release_scene( rast, scene ); + + lp_rast_end( rast ); + } + else { + /* threaded rendering! */ + unsigned i; + + lp_scene_enqueue( rast->full_scenes, scene ); + + /* signal the threads that there's work to do */ + for (i = 0; i < rast->num_threads; i++) { + pipe_semaphore_signal(&rast->tasks[i].work_ready); + } + + /* wait for work to complete */ + for (i = 0; i < rast->num_threads; i++) { + pipe_semaphore_wait(&rast->tasks[i].work_done); + } + } + + LP_DBG(DEBUG_SETUP, "%s done \n", __FUNCTION__); +} + + +/** + * This is the thread's main entrypoint. + * It's a simple loop: + * 1. wait for work + * 2. do work + * 3. signal that we're done + */ +static void * +thread_func( void *init_data ) +{ + struct lp_rasterizer_task *task = (struct lp_rasterizer_task *) init_data; + struct lp_rasterizer *rast = task->rast; + boolean debug = false; + + while (1) { + /* wait for work */ + if (debug) + debug_printf("thread %d waiting for work\n", task->thread_index); + pipe_semaphore_wait(&task->work_ready); + + if (task->thread_index == 0) { + /* thread[0]: + * - get next scene to rasterize + * - map the framebuffer surfaces + */ + const struct pipe_framebuffer_state *fb; + boolean write_depth; + + rast->curr_scene = lp_scene_dequeue( rast->full_scenes ); + + lp_scene_bin_iter_begin( rast->curr_scene ); + + fb = &rast->curr_scene->fb; + write_depth = rast->curr_scene->write_depth; + + lp_rast_begin( rast, fb, + fb->cbufs[0] != NULL, + fb->zsbuf != NULL && write_depth ); + } + + /* Wait for all threads to get here so that threads[1+] don't + * get a null rast->curr_scene pointer. + */ + pipe_barrier_wait( &rast->barrier ); + + /* do work */ + if (debug) + debug_printf("thread %d doing work\n", task->thread_index); + rasterize_scene(rast, + task->thread_index, + rast->curr_scene, + rast->curr_scene->write_depth); + + /* wait for all threads to finish with this scene */ + pipe_barrier_wait( &rast->barrier ); + + if (task->thread_index == 0) { + /* thread[0]: + * - release the scene object + * - unmap the framebuffer surfaces + */ + release_scene( rast, rast->curr_scene ); + lp_rast_end( rast ); + } + + /* signal done with work */ + if (debug) + debug_printf("thread %d done working\n", task->thread_index); + pipe_semaphore_signal(&task->work_done); + } + + return NULL; +} + + +/** + * Initialize semaphores and spawn the threads. + */ +static void +create_rast_threads(struct lp_rasterizer *rast) +{ + unsigned i; + + rast->num_threads = util_cpu_caps.nr_cpus; + rast->num_threads = debug_get_num_option("LP_NUM_THREADS", rast->num_threads); + rast->num_threads = MIN2(rast->num_threads, MAX_THREADS); + + /* NOTE: if num_threads is zero, we won't use any threads */ + for (i = 0; i < rast->num_threads; i++) { + pipe_semaphore_init(&rast->tasks[i].work_ready, 0); + pipe_semaphore_init(&rast->tasks[i].work_done, 0); + rast->threads[i] = pipe_thread_create(thread_func, + (void *) &rast->tasks[i]); + } +} + + + +/** + * Create new lp_rasterizer. + * \param empty the queue to put empty scenes on after we've finished + * processing them. + */ +struct lp_rasterizer * +lp_rast_create( struct pipe_screen *screen, struct lp_scene_queue *empty ) +{ + struct lp_rasterizer *rast; + unsigned i; + + rast = CALLOC_STRUCT(lp_rasterizer); + if(!rast) + return NULL; + + rast->screen = screen; + + rast->empty_scenes = empty; + rast->full_scenes = lp_scene_queue_create(); + + for (i = 0; i < Elements(rast->tasks); i++) { + rast->tasks[i].tile.color = align_malloc( TILE_SIZE*TILE_SIZE*4, 16 ); + rast->tasks[i].tile.depth = align_malloc( TILE_SIZE*TILE_SIZE*4, 16 ); + rast->tasks[i].rast = rast; + rast->tasks[i].thread_index = i; + } + + create_rast_threads(rast); + + /* for synchronizing rasterization threads */ + pipe_barrier_init( &rast->barrier, rast->num_threads ); + + return rast; +} + + +/* Shutdown: + */ +void lp_rast_destroy( struct lp_rasterizer *rast ) +{ + unsigned i; + + util_unreference_framebuffer_state(&rast->state.fb); + + for (i = 0; i < Elements(rast->tasks); i++) { + align_free(rast->tasks[i].tile.depth); + align_free(rast->tasks[i].tile.color); + } + + /* for synchronizing rasterization threads */ + pipe_barrier_destroy( &rast->barrier ); + + FREE(rast); +} + + +/** Return number of rasterization threads */ +unsigned +lp_rast_get_num_threads( struct lp_rasterizer *rast ) +{ + return rast->num_threads; +} diff --cc src/gallium/drivers/llvmpipe/lp_setup.c index 1eb944a0de7,b18f17c0cd3..5cdcf4ecc98 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@@ -418,47 -1114,113 +418,47 @@@ lp_setup_set_flatshade_first( struct se } -/** - * Do setup for line rasterization, then render the line. - * Single-pixel width, no stipple, etc. We rely on the 'draw' module - * to handle stippling and wide lines. - */ -void -llvmpipe_setup_line(struct setup_context *setup, - const float (*v0)[4], - const float (*v1)[4]) +void +lp_setup_set_vertex_info( struct setup_context *setup, + struct vertex_info *vertex_info ) { - int x0 = (int) v0[0][0]; - int x1 = (int) v1[0][0]; - int y0 = (int) v0[0][1]; - int y1 = (int) v1[0][1]; - int dx = x1 - x0; - int dy = y1 - y0; - int xstep, ystep; - -#if DEBUG_VERTS - debug_printf("Setup line:\n"); - print_vertex(setup, v0); - print_vertex(setup, v1); -#endif - - if (setup->llvmpipe->no_rast) - return; + /* XXX: just silently holding onto the pointer: + */ + setup->vertex_info = vertex_info; +} - if (dx == 0 && dy == 0) - return; - if (!setup_line_coefficients(setup, v0, v1)) - return; +void +lp_setup_set_sampler_textures( struct setup_context *setup, + unsigned num, struct pipe_texture **texture) +{ + struct pipe_texture *dummy; + unsigned i; - assert(v0[0][0] < 1.0e9); - assert(v0[0][1] < 1.0e9); - assert(v1[0][0] < 1.0e9); - assert(v1[0][1] < 1.0e9); + LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); - if (dx < 0) { - dx = -dx; /* make positive */ - xstep = -1; - } - else { - xstep = 1; - } - if (dy < 0) { - dy = -dy; /* make positive */ - ystep = -1; - } - else { - ystep = 1; - } + assert(num <= PIPE_MAX_SAMPLERS); - assert(dx >= 0); - assert(dy >= 0); - assert(setup->llvmpipe->reduced_prim == PIPE_PRIM_LINES); + for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { + struct pipe_texture *tex = i < num ? texture[i] : NULL; - setup->quad[0].input.x0 = setup->quad[0].input.y0 = -1; - setup->quad[0].inout.mask = 0x0; + /* FIXME: hold on to the reference */ + dummy = NULL; + pipe_texture_reference(&dummy, tex); - /* XXX temporary: set coverage to 1.0 so the line appears - * if AA mode happens to be enabled. - */ - setup->quad[0].input.coverage[0] = - setup->quad[0].input.coverage[1] = - setup->quad[0].input.coverage[2] = - setup->quad[0].input.coverage[3] = 1.0; - - if (dx > dy) { - /*** X-major line ***/ - int i; - const int errorInc = dy + dy; - int error = errorInc - dx; - const int errorDec = error - dx; - - for (i = 0; i < dx; i++) { - plot(setup, x0, y0); - - x0 += xstep; - if (error < 0) { - error += errorInc; - } - else { - error += errorDec; - y0 += ystep; - } - } - } - else { - /*** Y-major line ***/ - int i; - const int errorInc = dx + dx; - int error = errorInc - dy; - const int errorDec = error - dy; - - for (i = 0; i < dy; i++) { - plot(setup, x0, y0); - - y0 += ystep; - if (error < 0) { - error += errorInc; - } - else { - error += errorDec; - x0 += xstep; - } + if(tex) { + struct llvmpipe_texture *lp_tex = llvmpipe_texture(tex); + struct lp_jit_texture *jit_tex; + jit_tex = &setup->fs.current.jit_context.textures[i]; - jit_tex->width = tex->width[0]; - jit_tex->height = tex->height[0]; ++ jit_tex->width = tex->width0; ++ jit_tex->height = tex->height0; + jit_tex->stride = lp_tex->stride[0]; + if(!lp_tex->dt) + jit_tex->data = lp_tex->data; + else + /* FIXME: map the rendertarget */ + assert(0); } } diff --cc src/gallium/drivers/llvmpipe/lp_setup_tri.c index e15b987767c,00000000000..fe34903cf32 mode 100644,000000..100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@@ -1,550 -1,0 +1,547 @@@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* + * Binning code for triangles + */ + +#include "lp_setup_context.h" +#include "lp_rast.h" +#include "util/u_math.h" +#include "util/u_memory.h" + +#define NUM_CHANNELS 4 + +/** + * Compute a0 for a constant-valued coefficient (GL_FLAT shading). + */ +static void constant_coef( struct lp_rast_triangle *tri, + unsigned slot, + const float value, + unsigned i ) +{ + tri->inputs.a0[slot][i] = value; + tri->inputs.dadx[slot][i] = 0; + tri->inputs.dady[slot][i] = 0; +} + +/** + * Compute a0, dadx and dady for a linearly interpolated coefficient, + * for a triangle. + */ +static void linear_coef( struct lp_rast_triangle *tri, + float oneoverarea, + unsigned slot, + const float (*v1)[4], + const float (*v2)[4], + const float (*v3)[4], + unsigned vert_attr, + unsigned i) +{ + float a1 = v1[vert_attr][i]; + float a2 = v2[vert_attr][i]; + float a3 = v3[vert_attr][i]; + + float da12 = a1 - a2; + float da31 = a3 - a1; + float dadx = (da12 * tri->dy31 - tri->dy12 * da31) * oneoverarea; + float dady = (da31 * tri->dx12 - tri->dx31 * da12) * oneoverarea; + + tri->inputs.dadx[slot][i] = dadx; + tri->inputs.dady[slot][i] = dady; + + /* calculate a0 as the value which would be sampled for the + * fragment at (0,0), taking into account that we want to sample at + * pixel centers, in other words (0.5, 0.5). + * + * this is neat but unfortunately not a good way to do things for + * triangles with very large values of dadx or dady as it will + * result in the subtraction and re-addition from a0 of a very + * large number, which means we'll end up loosing a lot of the + * fractional bits and precision from a0. the way to fix this is + * to define a0 as the sample at a pixel center somewhere near vmin + * instead - i'll switch to this later. + */ + tri->inputs.a0[slot][i] = (v1[vert_attr][i] - + (dadx * (v1[0][0] - 0.5f) + + dady * (v1[0][1] - 0.5f))); +} + + +/** + * Compute a0, dadx and dady for a perspective-corrected interpolant, + * for a triangle. + * We basically multiply the vertex value by 1/w before computing + * the plane coefficients (a0, dadx, dady). + * Later, when we compute the value at a particular fragment position we'll + * divide the interpolated value by the interpolated W at that fragment. + */ +static void perspective_coef( struct lp_rast_triangle *tri, + float oneoverarea, + unsigned slot, + const float (*v1)[4], + const float (*v2)[4], + const float (*v3)[4], + unsigned vert_attr, + unsigned i) +{ + /* premultiply by 1/w (v[0][3] is always 1/w): + */ + float a1 = v1[vert_attr][i] * v1[0][3]; + float a2 = v2[vert_attr][i] * v2[0][3]; + float a3 = v3[vert_attr][i] * v3[0][3]; + float da12 = a1 - a2; + float da31 = a3 - a1; + float dadx = (da12 * tri->dy31 - tri->dy12 * da31) * oneoverarea; + float dady = (da31 * tri->dx12 - tri->dx31 * da12) * oneoverarea; + + tri->inputs.dadx[slot][i] = dadx; + tri->inputs.dady[slot][i] = dady; + tri->inputs.a0[slot][i] = (a1 - + (dadx * (v1[0][0] - 0.5f) + + dady * (v1[0][1] - 0.5f))); +} + + +/** + * Special coefficient setup for gl_FragCoord. + * X and Y are trivial, though Y has to be inverted for OpenGL. + * Z and W are copied from position_coef which should have already been computed. + * We could do a bit less work if we'd examine gl_FragCoord's swizzle mask. + */ +static void +setup_fragcoord_coef(struct lp_rast_triangle *tri, + float oneoverarea, + unsigned slot, + const float (*v1)[4], + const float (*v2)[4], + const float (*v3)[4]) +{ + /*X*/ + tri->inputs.a0[slot][0] = 0.0; + tri->inputs.dadx[slot][0] = 1.0; + tri->inputs.dady[slot][0] = 0.0; + /*Y*/ + tri->inputs.a0[slot][1] = 0.0; + tri->inputs.dadx[slot][1] = 0.0; + tri->inputs.dady[slot][1] = 1.0; + /*Z*/ + linear_coef(tri, oneoverarea, slot, v1, v2, v3, 0, 2); + /*W*/ + linear_coef(tri, oneoverarea, slot, v1, v2, v3, 0, 3); +} + + +static void setup_facing_coef( struct lp_rast_triangle *tri, + unsigned slot, + boolean frontface ) +{ + constant_coef( tri, slot, 1.0f - frontface, 0 ); + constant_coef( tri, slot, 0.0f, 1 ); /* wasted */ + constant_coef( tri, slot, 0.0f, 2 ); /* wasted */ + constant_coef( tri, slot, 0.0f, 3 ); /* wasted */ +} + + +/** + * Compute the tri->coef[] array dadx, dady, a0 values. + */ +static void setup_tri_coefficients( struct setup_context *setup, + struct lp_rast_triangle *tri, + float oneoverarea, + const float (*v1)[4], + const float (*v2)[4], + const float (*v3)[4], + boolean frontface) +{ + struct lp_scene *scene = lp_setup_get_current_scene(setup); + unsigned slot; + + /* Allocate space for the a0, dadx and dady arrays + */ + { + unsigned bytes; + bytes = (setup->fs.nr_inputs + 1) * 4 * sizeof(float); + tri->inputs.a0 = lp_scene_alloc_aligned( scene, bytes, 16 ); + tri->inputs.dadx = lp_scene_alloc_aligned( scene, bytes, 16 ); + tri->inputs.dady = lp_scene_alloc_aligned( scene, bytes, 16 ); + } + + /* The internal position input is in slot zero: + */ + setup_fragcoord_coef(tri, oneoverarea, 0, v1, v2, v3); + + /* setup interpolation for all the remaining attributes: + */ + for (slot = 0; slot < setup->fs.nr_inputs; slot++) { + unsigned vert_attr = setup->fs.input[slot].src_index; + unsigned i; + + switch (setup->fs.input[slot].interp) { + case LP_INTERP_CONSTANT: + for (i = 0; i < NUM_CHANNELS; i++) + constant_coef(tri, slot+1, v3[vert_attr][i], i); + break; + + case LP_INTERP_LINEAR: + for (i = 0; i < NUM_CHANNELS; i++) + linear_coef(tri, oneoverarea, slot+1, v1, v2, v3, vert_attr, i); + break; + + case LP_INTERP_PERSPECTIVE: + for (i = 0; i < NUM_CHANNELS; i++) + perspective_coef(tri, oneoverarea, slot+1, v1, v2, v3, vert_attr, i); + break; + + case LP_INTERP_POSITION: + /* XXX: fix me - duplicates the values in slot zero. + */ + setup_fragcoord_coef(tri, oneoverarea, slot+1, v1, v2, v3); + break; + + case LP_INTERP_FACING: + setup_facing_coef(tri, slot+1, frontface); + break; + + default: + assert(0); + } + } +} + + + +static inline int subpixel_snap( float a ) +{ + return util_iround(FIXED_ONE * a); +} + + - #define MIN3(a,b,c) MIN2(MIN2(a,b),c) - #define MAX3(a,b,c) MAX2(MAX2(a,b),c) - +/** + * Do basic setup for triangle rasterization and determine which + * framebuffer tiles are touched. Put the triangle in the scene's + * bins for the tiles which we overlap. + */ +static void +do_triangle_ccw(struct setup_context *setup, + const float (*v1)[4], + const float (*v2)[4], + const float (*v3)[4], + boolean frontfacing ) +{ + /* x/y positions in fixed point */ + const int x1 = subpixel_snap(v1[0][0]); + const int x2 = subpixel_snap(v2[0][0]); + const int x3 = subpixel_snap(v3[0][0]); + const int y1 = subpixel_snap(v1[0][1]); + const int y2 = subpixel_snap(v2[0][1]); + const int y3 = subpixel_snap(v3[0][1]); + + struct lp_scene *scene = lp_setup_get_current_scene(setup); + struct lp_rast_triangle *tri = lp_scene_alloc_aligned( scene, sizeof *tri, 16 ); + float area, oneoverarea; + int minx, maxx, miny, maxy; + + tri->dx12 = x1 - x2; + tri->dx23 = x2 - x3; + tri->dx31 = x3 - x1; + + tri->dy12 = y1 - y2; + tri->dy23 = y2 - y3; + tri->dy31 = y3 - y1; + + area = (tri->dx12 * tri->dy31 - + tri->dx31 * tri->dy12); + + /* Cull non-ccw and zero-sized triangles. + * + * XXX: subject to overflow?? + */ + if (area <= 0) { + lp_scene_putback_data( scene, sizeof *tri ); + return; + } + + /* Bounding rectangle (in pixels) */ + tri->minx = (MIN3(x1, x2, x3) + 0xf) >> FIXED_ORDER; + tri->maxx = (MAX3(x1, x2, x3) + 0xf) >> FIXED_ORDER; + tri->miny = (MIN3(y1, y2, y3) + 0xf) >> FIXED_ORDER; + tri->maxy = (MAX3(y1, y2, y3) + 0xf) >> FIXED_ORDER; + + if (tri->miny == tri->maxy || + tri->minx == tri->maxx) { + lp_scene_putback_data( scene, sizeof *tri ); + return; + } + + /* + */ + oneoverarea = ((float)FIXED_ONE) / (float)area; + + /* Setup parameter interpolants: + */ + setup_tri_coefficients( setup, tri, oneoverarea, v1, v2, v3, frontfacing ); + + /* half-edge constants, will be interated over the whole + * rendertarget. + */ + tri->c1 = tri->dy12 * x1 - tri->dx12 * y1; + tri->c2 = tri->dy23 * x2 - tri->dx23 * y2; + tri->c3 = tri->dy31 * x3 - tri->dx31 * y3; + + /* correct for top-left fill convention: + */ + if (tri->dy12 < 0 || (tri->dy12 == 0 && tri->dx12 > 0)) tri->c1++; + if (tri->dy23 < 0 || (tri->dy23 == 0 && tri->dx23 > 0)) tri->c2++; + if (tri->dy31 < 0 || (tri->dy31 == 0 && tri->dx31 > 0)) tri->c3++; + + tri->dy12 *= FIXED_ONE; + tri->dy23 *= FIXED_ONE; + tri->dy31 *= FIXED_ONE; + + tri->dx12 *= FIXED_ONE; + tri->dx23 *= FIXED_ONE; + tri->dx31 *= FIXED_ONE; + + /* find trivial reject offsets for each edge for a single-pixel + * sized block. These will be scaled up at each recursive level to + * match the active blocksize. Scaling in this way works best if + * the blocks are square. + */ + tri->eo1 = 0; + if (tri->dy12 < 0) tri->eo1 -= tri->dy12; + if (tri->dx12 > 0) tri->eo1 += tri->dx12; + + tri->eo2 = 0; + if (tri->dy23 < 0) tri->eo2 -= tri->dy23; + if (tri->dx23 > 0) tri->eo2 += tri->dx23; + + tri->eo3 = 0; + if (tri->dy31 < 0) tri->eo3 -= tri->dy31; + if (tri->dx31 > 0) tri->eo3 += tri->dx31; + + /* Calculate trivial accept offsets from the above. + */ + tri->ei1 = tri->dx12 - tri->dy12 - tri->eo1; + tri->ei2 = tri->dx23 - tri->dy23 - tri->eo2; + tri->ei3 = tri->dx31 - tri->dy31 - tri->eo3; + + { + const int xstep1 = -tri->dy12; + const int xstep2 = -tri->dy23; + const int xstep3 = -tri->dy31; + + const int ystep1 = tri->dx12; + const int ystep2 = tri->dx23; + const int ystep3 = tri->dx31; + + int qx, qy, ix, iy; + int i = 0; + + for (qy = 0; qy < 2; qy++) { + for (qx = 0; qx < 2; qx++) { + for (iy = 0; iy < 2; iy++) { + for (ix = 0; ix < 2; ix++, i++) { + int x = qx * 2 + ix; + int y = qy * 2 + iy; + tri->inputs.step[0][i] = x * xstep1 + y * ystep1; + tri->inputs.step[1][i] = x * xstep2 + y * ystep2; + tri->inputs.step[2][i] = x * xstep3 + y * ystep3; + } + } + } + } + } + + /* + * All fields of 'tri' are now set. The remaining code here is + * concerned with binning. + */ + + /* Convert to tile coordinates: + */ + minx = tri->minx / TILE_SIZE; + miny = tri->miny / TILE_SIZE; + maxx = tri->maxx / TILE_SIZE; + maxy = tri->maxy / TILE_SIZE; + + /* Determine which tile(s) intersect the triangle's bounding box + */ + if (miny == maxy && minx == maxx) + { + /* Triangle is contained in a single tile: + */ + lp_scene_bin_command( scene, minx, miny, lp_rast_triangle, + lp_rast_arg_triangle(tri) ); + } + else + { + int c1 = (tri->c1 + + tri->dx12 * miny * TILE_SIZE - + tri->dy12 * minx * TILE_SIZE); + int c2 = (tri->c2 + + tri->dx23 * miny * TILE_SIZE - + tri->dy23 * minx * TILE_SIZE); + int c3 = (tri->c3 + + tri->dx31 * miny * TILE_SIZE - + tri->dy31 * minx * TILE_SIZE); + + int ei1 = tri->ei1 << TILE_ORDER; + int ei2 = tri->ei2 << TILE_ORDER; + int ei3 = tri->ei3 << TILE_ORDER; + + int eo1 = tri->eo1 << TILE_ORDER; + int eo2 = tri->eo2 << TILE_ORDER; + int eo3 = tri->eo3 << TILE_ORDER; + + int xstep1 = -(tri->dy12 << TILE_ORDER); + int xstep2 = -(tri->dy23 << TILE_ORDER); + int xstep3 = -(tri->dy31 << TILE_ORDER); + + int ystep1 = tri->dx12 << TILE_ORDER; + int ystep2 = tri->dx23 << TILE_ORDER; + int ystep3 = tri->dx31 << TILE_ORDER; + int x, y; + + + /* Trivially accept or reject blocks, else jump to per-pixel + * examination above. + */ + for (y = miny; y <= maxy; y++) + { + int cx1 = c1; + int cx2 = c2; + int cx3 = c3; + int in = 0; + + for (x = minx; x <= maxx; x++) + { + if (cx1 + eo1 < 0 || + cx2 + eo2 < 0 || + cx3 + eo3 < 0) + { + /* do nothing */ + if (in) + break; + } + else if (cx1 + ei1 > 0 && + cx2 + ei2 > 0 && + cx3 + ei3 > 0) + { + in = 1; + /* triangle covers the whole tile- shade whole tile */ + lp_scene_bin_command( scene, x, y, + lp_rast_shade_tile, + lp_rast_arg_inputs(&tri->inputs) ); + } + else + { + in = 1; + /* shade partial tile */ + lp_scene_bin_command( scene, x, y, + lp_rast_triangle, + lp_rast_arg_triangle(tri) ); + } + + /* Iterate cx values across the region: + */ + cx1 += xstep1; + cx2 += xstep2; + cx3 += xstep3; + } + + /* Iterate c values down the region: + */ + c1 += ystep1; + c2 += ystep2; + c3 += ystep3; + } + } +} + +static void triangle_cw( struct setup_context *setup, + const float (*v0)[4], + const float (*v1)[4], + const float (*v2)[4] ) +{ + do_triangle_ccw( setup, v1, v0, v2, !setup->ccw_is_frontface ); +} + +static void triangle_ccw( struct setup_context *setup, + const float (*v0)[4], + const float (*v1)[4], + const float (*v2)[4] ) +{ + do_triangle_ccw( setup, v0, v1, v2, setup->ccw_is_frontface ); +} + +static void triangle_both( struct setup_context *setup, + const float (*v0)[4], + const float (*v1)[4], + const float (*v2)[4] ) +{ + /* edge vectors e = v0 - v2, f = v1 - v2 */ + const float ex = v0[0][0] - v2[0][0]; + const float ey = v0[0][1] - v2[0][1]; + const float fx = v1[0][0] - v2[0][0]; + const float fy = v1[0][1] - v2[0][1]; + + /* det = cross(e,f).z */ + if (ex * fy - ey * fx < 0) + triangle_ccw( setup, v0, v1, v2 ); + else + triangle_cw( setup, v0, v1, v2 ); +} + +static void triangle_nop( struct setup_context *setup, + const float (*v0)[4], + const float (*v1)[4], + const float (*v2)[4] ) +{ +} + + +void +lp_setup_choose_triangle( struct setup_context *setup ) +{ + switch (setup->cullmode) { + case PIPE_WINDING_NONE: + setup->triangle = triangle_both; + break; + case PIPE_WINDING_CCW: + setup->triangle = triangle_cw; + break; + case PIPE_WINDING_CW: + setup->triangle = triangle_ccw; + break; + default: + setup->triangle = triangle_nop; + break; + } +} + + diff --cc src/gallium/drivers/llvmpipe/lp_state.h index 6017dc553a6,7020da145f3..25d13536741 --- a/src/gallium/drivers/llvmpipe/lp_state.h +++ b/src/gallium/drivers/llvmpipe/lp_state.h @@@ -54,10 -54,8 +54,9 @@@ #define LP_NEW_VERTEX 0x1000 #define LP_NEW_VS 0x2000 #define LP_NEW_QUERY 0x4000 +#define LP_NEW_BLEND_COLOR 0x8000 - struct tgsi_sampler; struct vertex_info; struct pipe_context; struct llvmpipe_context; @@@ -204,10 -211,12 +212,6 @@@ llvmpipe_draw_range_elements(struct pip unsigned max_index, unsigned mode, unsigned start, unsigned count); --void - llvmpipe_set_edgeflags(struct pipe_context *pipe, const unsigned *edgeflags); -llvmpipe_map_transfers(struct llvmpipe_context *lp); -- -void -llvmpipe_unmap_transfers(struct llvmpipe_context *lp); -- void llvmpipe_map_texture_surfaces(struct llvmpipe_context *lp); diff --cc src/gallium/drivers/llvmpipe/lp_state_blend.c index 48afe5f5242,a94cd05ef20..a10c5918df3 --- a/src/gallium/drivers/llvmpipe/lp_state_blend.c +++ b/src/gallium/drivers/llvmpipe/lp_state_blend.c @@@ -67,16 -73,22 +73,21 @@@ void llvmpipe_set_blend_color( struct p const struct pipe_blend_color *blend_color ) { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); - unsigned i, j; + + if(!blend_color) + return; + + if(memcmp(&llvmpipe->blend_color, blend_color, sizeof *blend_color) == 0) + return; + if(memcmp(&llvmpipe->blend_color, blend_color, sizeof *blend_color) == 0) + return; + + draw_flush(llvmpipe->draw); + memcpy(&llvmpipe->blend_color, blend_color, sizeof *blend_color); - if(!llvmpipe->jit_context.blend_color) - llvmpipe->jit_context.blend_color = align_malloc(4 * 16, 16); - for (i = 0; i < 4; ++i) { - uint8_t c = float_to_ubyte(blend_color->color[i]); - for (j = 0; j < 16; ++j) - llvmpipe->jit_context.blend_color[i*16 + j] = c; - } + llvmpipe->dirty |= LP_NEW_BLEND_COLOR; } @@@ -98,8 -110,16 +109,13 @@@ llvmpipe_bind_depth_stencil_state(struc { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); - llvmpipe->depth_stencil = (const struct pipe_depth_stencil_alpha_state *)depth_stencil; + if (llvmpipe->depth_stencil == depth_stencil) + return; + + draw_flush(llvmpipe->draw); + + llvmpipe->depth_stencil = depth_stencil; - if(llvmpipe->depth_stencil) - llvmpipe->jit_context.alpha_ref_value = llvmpipe->depth_stencil->alpha.ref_value; - llvmpipe->dirty |= LP_NEW_DEPTH_STENCIL_ALPHA; } diff --cc src/gallium/drivers/llvmpipe/lp_state_derived.c index cc7b09fd4d1,6c1ef6bc42d..78d046985b9 --- a/src/gallium/drivers/llvmpipe/lp_state_derived.c +++ b/src/gallium/drivers/llvmpipe/lp_state_derived.c @@@ -43,95 -54,85 +43,95 @@@ * (simple float[][4]) used by the 'draw' module into vertices for * rasterization. * - * This function validates the vertex layout and returns a pointer to a - * vertex_info object. + * This function validates the vertex layout. */ -struct vertex_info * -llvmpipe_get_vertex_info(struct llvmpipe_context *llvmpipe) +static void +compute_vertex_info(struct llvmpipe_context *llvmpipe) { + const struct lp_fragment_shader *lpfs = llvmpipe->fs; struct vertex_info *vinfo = &llvmpipe->vertex_info; - const uint num = draw_num_vs_outputs(llvmpipe->draw); ++ const uint num = draw_num_shader_outputs(llvmpipe->draw); + uint i; - if (vinfo->num_attribs == 0) { - /* compute vertex layout now */ - const struct lp_fragment_shader *lpfs = llvmpipe->fs; - struct vertex_info *vinfo_vbuf = &llvmpipe->vertex_info_vbuf; - const uint num = draw_current_shader_outputs(llvmpipe->draw); - uint i; + /* Tell setup to tell the draw module to simply emit the whole + * post-xform vertex as-is. + * + * Not really sure if this is the best approach. + */ + vinfo->num_attribs = 0; + for (i = 0; i < num; i++) { + draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, i); + } + draw_compute_vertex_size(vinfo); - /* Tell draw_vbuf to simply emit the whole post-xform vertex - * as-is. No longer any need to try and emit draw vertex_header - * info. - */ - vinfo_vbuf->num_attribs = 0; - for (i = 0; i < num; i++) { - draw_emit_vertex_attr(vinfo_vbuf, EMIT_4F, INTERP_PERSPECTIVE, i); - } - draw_compute_vertex_size(vinfo_vbuf); - /* - * Loop over fragment shader inputs, searching for the matching output - * from the vertex shader. - */ - vinfo->num_attribs = 0; - for (i = 0; i < lpfs->info.num_inputs; i++) { - int src; - enum interp_mode interp; + lp_setup_set_vertex_info(llvmpipe->setup, vinfo); - switch (lpfs->info.input_interpolate[i]) { - case TGSI_INTERPOLATE_CONSTANT: - interp = INTERP_CONSTANT; - break; - case TGSI_INTERPOLATE_LINEAR: - interp = INTERP_LINEAR; - break; - case TGSI_INTERPOLATE_PERSPECTIVE: - interp = INTERP_PERSPECTIVE; - break; - default: - assert(0); - interp = INTERP_LINEAR; - } +/* + llvmpipe->psize_slot = draw_find_vs_output(llvmpipe->draw, + TGSI_SEMANTIC_PSIZE, 0); +*/ + /* Now match FS inputs against emitted vertex data. It's also + * entirely possible to just have a fixed layout for FS input, + * determined by the fragment shader itself, and adjust the draw + * outputs to match that. + */ + { + struct lp_shader_input inputs[PIPE_MAX_SHADER_INPUTS]; + + for (i = 0; i < lpfs->info.num_inputs; i++) { + + /* This can be precomputed, except for flatshade: + */ switch (lpfs->info.input_semantic_name[i]) { + case TGSI_SEMANTIC_FACE: + inputs[i].interp = LP_INTERP_FACING; + break; case TGSI_SEMANTIC_POSITION: - interp = INTERP_POS; + inputs[i].interp = LP_INTERP_POSITION; break; - case TGSI_SEMANTIC_COLOR: - if (llvmpipe->rasterizer->flatshade) { - interp = INTERP_CONSTANT; - } + /* Colors are linearly interpolated in the fragment shader + * even when flatshading is active. This just tells the + * setup module to use coefficients with ddx==0 and + * ddy==0. + */ + if (llvmpipe->rasterizer->flatshade) + inputs[i].interp = LP_INTERP_CONSTANT; + else + inputs[i].interp = LP_INTERP_LINEAR; break; - } - /* this includes texcoords and varying vars */ - src = draw_find_shader_output(llvmpipe->draw, - lpfs->info.input_semantic_name[i], - lpfs->info.input_semantic_index[i]); - draw_emit_vertex_attr(vinfo, EMIT_4F, interp, src); - } + default: + switch (lpfs->info.input_interpolate[i]) { + case TGSI_INTERPOLATE_CONSTANT: + inputs[i].interp = LP_INTERP_CONSTANT; + break; + case TGSI_INTERPOLATE_LINEAR: + inputs[i].interp = LP_INTERP_LINEAR; + break; + case TGSI_INTERPOLATE_PERSPECTIVE: + inputs[i].interp = LP_INTERP_PERSPECTIVE; + break; + default: + assert(0); + break; + } + } - llvmpipe->psize_slot = draw_find_shader_output(llvmpipe->draw, - TGSI_SEMANTIC_PSIZE, 0); - if (llvmpipe->psize_slot > 0) { - draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT, - llvmpipe->psize_slot); + /* Search for each input in current vs output: + */ + inputs[i].src_index = - draw_find_vs_output(llvmpipe->draw, - lpfs->info.input_semantic_name[i], - lpfs->info.input_semantic_index[i]); ++ draw_find_shader_output(llvmpipe->draw, ++ lpfs->info.input_semantic_name[i], ++ lpfs->info.input_semantic_index[i]); } - draw_compute_vertex_size(vinfo); + lp_setup_set_fs_inputs(llvmpipe->setup, + inputs, + lpfs->info.num_inputs); } - - return vinfo; } diff --cc src/gallium/drivers/llvmpipe/lp_state_fs.c index 3ad58415e39,b73ca2d41ed..3a669ba859a --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@@ -801,17 -724,18 +848,17 @@@ llvmpipe_set_constant_buffer(struct pip assert(shader < PIPE_SHADER_TYPES); assert(index == 0); + if(llvmpipe->constants[shader].buffer == buffer) + return; + - if(shader == PIPE_SHADER_VERTEX) - draw_flush(llvmpipe->draw); + draw_flush(llvmpipe->draw); /* note: reference counting */ pipe_buffer_reference(&llvmpipe->constants[shader].buffer, buffer); - if(shader == PIPE_SHADER_FRAGMENT) { - llvmpipe->jit_context.constants = data; - } - if(shader == PIPE_SHADER_VERTEX) { - draw_set_mapped_constant_buffer(llvmpipe->draw, data, size); + draw_set_mapped_constant_buffer(llvmpipe->draw, PIPE_SHADER_VERTEX, + data, size); } llvmpipe->dirty |= LP_NEW_CONSTANTS; diff --cc src/gallium/drivers/llvmpipe/lp_state_rasterizer.c index 282ed2e9ea3,aa3b5a3f91e..7d4c310aae8 --- a/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c +++ b/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c @@@ -46,21 -45,14 +46,24 @@@ void llvmpipe_bind_rasterizer_state(str { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); + if (llvmpipe->rasterizer == rasterizer) + return; + /* pass-through to draw module */ - draw_set_rasterizer_state(llvmpipe->draw, setup); + draw_set_rasterizer_state(llvmpipe->draw, rasterizer); - llvmpipe->rasterizer = (struct pipe_rasterizer_state *)setup; + llvmpipe->rasterizer = rasterizer; + /* Note: we can immediately set the triangle state here and + * not worry about binning because we handle culling during + * triangle setup, not when rasterizing the bins. + */ + if (llvmpipe->rasterizer) { + lp_setup_set_triangle_state( llvmpipe->setup, + llvmpipe->rasterizer->cull_mode, + llvmpipe->rasterizer->front_winding == PIPE_WINDING_CCW ); + } + llvmpipe->dirty |= LP_NEW_RASTERIZER; } diff --cc src/gallium/drivers/llvmpipe/lp_state_sampler.c index e19394a4c92,d382f9ca87e..976f81113fd --- a/src/gallium/drivers/llvmpipe/lp_state_sampler.c +++ b/src/gallium/drivers/llvmpipe/lp_state_sampler.c @@@ -104,6 -144,37 +132,36 @@@ llvmpipe_set_sampler_textures(struct pi } + void + llvmpipe_set_vertex_sampler_textures(struct pipe_context *pipe, + unsigned num_textures, + struct pipe_texture **textures) + { + struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); + uint i; + + assert(num_textures <= PIPE_MAX_VERTEX_SAMPLERS); + + /* Check for no-op */ + if (num_textures == llvmpipe->num_vertex_textures && + !memcmp(llvmpipe->vertex_textures, textures, num_textures * sizeof(struct pipe_texture *))) { + return; + } + + draw_flush(llvmpipe->draw); + + for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) { + struct pipe_texture *tex = i < num_textures ? textures[i] : NULL; + + pipe_texture_reference(&llvmpipe->vertex_textures[i], tex); - lp_tex_tile_cache_set_texture(llvmpipe->vertex_tex_cache[i], tex); + } + + llvmpipe->num_vertex_textures = num_textures; + + llvmpipe->dirty |= LP_NEW_TEXTURE; + } + + void llvmpipe_delete_sampler_state(struct pipe_context *pipe, void *sampler) diff --cc src/gallium/drivers/llvmpipe/lp_state_surface.c index 957e947fe02,e37ff04f3df..0e9f03b90b8 --- a/src/gallium/drivers/llvmpipe/lp_state_surface.c +++ b/src/gallium/drivers/llvmpipe/lp_state_surface.c @@@ -37,9 -35,14 +37,11 @@@ #include "draw/draw_context.h" + #include "util/u_format.h" + /** - * XXX this might get moved someday * Set the framebuffer surface info: color buffers, zbuffer, stencil buffer. - * Here, we flush the old surfaces and update the tile cache to point to the new - * surfaces. */ void llvmpipe_set_framebuffer_state(struct pipe_context *pipe,