From: Keith Whitwell Date: Mon, 18 Oct 2010 02:03:42 +0000 (-0700) Subject: Merge remote branch 'origin/master' into lp-setup-llvm X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=0072acd447dc6be652e63752e50215c3105322c8;p=mesa.git Merge remote branch 'origin/master' into lp-setup-llvm Conflicts: src/gallium/drivers/llvmpipe/lp_setup_coef.c src/gallium/drivers/llvmpipe/lp_setup_coef.h src/gallium/drivers/llvmpipe/lp_setup_coef_intrin.c src/gallium/drivers/llvmpipe/lp_setup_point.c src/gallium/drivers/llvmpipe/lp_setup_tri.c src/gallium/drivers/llvmpipe/lp_state_derived.c src/gallium/drivers/llvmpipe/lp_state_fs.h --- 0072acd447dc6be652e63752e50215c3105322c8 diff --cc src/gallium/drivers/llvmpipe/lp_setup.c index 3854fd70af7,e96f012f1bb..6118434d3d3 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@@ -861,17 -921,12 +919,19 @@@ lp_setup_update_state( struct lp_setup_ setup->psize = lp->psize_slot; assert(lp->dirty == 0); + + assert(lp->setup_variant.key.size == + setup->setup.variant->key.size); + + assert(memcmp(&lp->setup_variant.key, + &setup->setup.variant->key, + setup->setup.variant->key.size) == 0); } - if (update_scene) - set_scene_state( setup, SETUP_ACTIVE, __FUNCTION__ ); + if (update_scene) { + if (!set_scene_state( setup, SETUP_ACTIVE, __FUNCTION__ )) + return FALSE; + } /* Only call into update_scene_state() if we already have a * scene: diff --cc src/gallium/drivers/llvmpipe/lp_setup.h index 19078ebbcb4,25dab78f64f..ebb18f81344 --- a/src/gallium/drivers/llvmpipe/lp_setup.h +++ b/src/gallium/drivers/llvmpipe/lp_setup.h @@@ -85,11 -107,13 +85,12 @@@ voi lp_setup_set_point_state( struct lp_setup_context *setup, float point_size, boolean point_size_per_vertex, - uint sprite); + uint sprite_coord_enable, + uint sprite_coord_origin); void -lp_setup_set_fs_inputs( struct lp_setup_context *setup, - const struct lp_shader_input *interp, - unsigned nr ); +lp_setup_set_setup_variant( struct lp_setup_context *setup, + const struct lp_setup_variant *variant ); void lp_setup_set_fs_variant( struct lp_setup_context *setup, diff --cc src/gallium/drivers/llvmpipe/lp_setup_line.c index 928ffdc5cb5,ece8638b5a6..827413bb33e --- a/src/gallium/drivers/llvmpipe/lp_setup_line.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_line.c @@@ -160,26 -160,24 +161,25 @@@ setup_fragcoord_coef( struct lp_setup_c * Compute the tri->coef[] array dadx, dady, a0 values. */ static void setup_line_coefficients( struct lp_setup_context *setup, - struct lp_rast_triangle *tri, struct lp_line_info *info) { + const struct lp_setup_variant_key *key = &setup->setup.variant->key; unsigned fragcoord_usage_mask = TGSI_WRITEMASK_XYZ; unsigned slot; /* setup interpolation for all the remaining attributes: */ - for (slot = 0; slot < setup->fs.nr_inputs; slot++) { - unsigned vert_attr = setup->fs.input[slot].src_index; - unsigned usage_mask = setup->fs.input[slot].usage_mask; + for (slot = 0; slot < key->num_inputs; slot++) { + unsigned vert_attr = key->inputs[slot].src_index; + unsigned usage_mask = key->inputs[slot].usage_mask; unsigned i; - switch (setup->fs.input[slot].interp) { + switch (key->inputs[slot].interp) { case LP_INTERP_CONSTANT: - if (setup->flatshade_first) { + if (key->flatshade_first) { for (i = 0; i < NUM_CHANNELS; i++) if (usage_mask & (1 << i)) - constant_coef(setup, tri, slot+1, info->v1[vert_attr][i], i); + constant_coef(setup, info, slot+1, info->v1[vert_attr][i], i); } else { for (i = 0; i < NUM_CHANNELS; i++) @@@ -272,8 -275,8 +278,9 @@@ try_setup_line( struct lp_setup_contex const float (*v2)[4]) { struct lp_scene *scene = setup->scene; + const struct lp_setup_variant_key *key = &setup->setup.variant->key; struct lp_rast_triangle *line; + struct lp_rast_plane *plane; struct lp_line_info info; float width = MAX2(1.0, setup->line_width); struct u_rect bbox; diff --cc src/gallium/drivers/llvmpipe/lp_setup_point.c index c98966022ee,16d21df35e2..146f1bd07ca --- a/src/gallium/drivers/llvmpipe/lp_setup_point.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_point.c @@@ -33,10 -33,8 +33,9 @@@ #include "util/u_math.h" #include "util/u_memory.h" #include "lp_perf.h" - #include "lp_setup_context.h" #include "lp_rast.h" #include "lp_state_fs.h" +#include "lp_state_setup.h" #include "tgsi/tgsi_scan.h" #define NUM_CHANNELS 4 @@@ -150,21 -203,26 +204,27 @@@ setup_point_fragcoord_coef(struct lp_se */ static void setup_point_coefficients( struct lp_setup_context *setup, - struct lp_rast_triangle *point, - const struct point_info *info) + struct point_info *info) { + const struct lp_setup_variant_key *key = &setup->setup.variant->key; + const struct lp_fragment_shader *shader = setup->fs.current.variant->shader; unsigned fragcoord_usage_mask = TGSI_WRITEMASK_XYZ; unsigned slot; /* setup interpolation for all the remaining attributes: */ - for (slot = 0; slot < setup->fs.nr_inputs; slot++) { - enum lp_interp interp = setup->fs.input[slot].interp; + for (slot = 0; slot < key->num_inputs; slot++) { + unsigned vert_attr = key->inputs[slot].src_index; + unsigned usage_mask = key->inputs[slot].usage_mask; ++ enum lp_interp interp = key->inputs[slot].interp; + boolean perspective = !!(interp == LP_INTERP_PERSPECTIVE); - unsigned vert_attr = setup->fs.input[slot].src_index; - unsigned usage_mask = setup->fs.input[slot].usage_mask; unsigned i; + + if (perspective & usage_mask) { + fragcoord_usage_mask |= TGSI_WRITEMASK_W; + } - switch (key->inputs[slot].interp) { + switch (interp) { case LP_INTERP_POSITION: /* * The generated pixel interpolators will pick up the coeffs from diff --cc src/gallium/drivers/llvmpipe/lp_setup_tri.c index 43617a6b672,15c414d8c3b..03036cd75d2 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@@ -32,11 -32,12 +32,12 @@@ #include "util/u_math.h" #include "util/u_memory.h" #include "util/u_rect.h" + #include "util/u_sse.h" #include "lp_perf.h" #include "lp_setup_context.h" -#include "lp_setup_coef.h" #include "lp_rast.h" #include "lp_state_fs.h" +#include "lp_state_setup.h" #define NUM_CHANNELS 4 @@@ -225,14 -231,12 +232,13 @@@ do_triangle_ccw(struct lp_setup_contex boolean frontfacing ) { struct lp_scene *scene = setup->scene; + const struct lp_setup_variant_key *key = &setup->setup.variant->key; struct lp_rast_triangle *tri; - int x[3]; - int y[3]; - int area; + struct lp_rast_plane *plane; + int x[4]; + int y[4]; struct u_rect bbox; unsigned tri_bytes; - int i; int nr_planes = 3; if (0) @@@ -305,104 -311,160 +313,173 @@@ tri->v[2][1] = v2[0][1]; #endif - tri->plane[0].dcdy = x[0] - x[1]; - tri->plane[1].dcdy = x[1] - x[2]; - tri->plane[2].dcdy = x[2] - x[0]; - - tri->plane[0].dcdx = y[0] - y[1]; - tri->plane[1].dcdx = y[1] - y[2]; - tri->plane[2].dcdx = y[2] - y[0]; - - area = (tri->plane[0].dcdy * tri->plane[2].dcdx - - tri->plane[2].dcdy * tri->plane[0].dcdx); - LP_COUNT(nr_tris); - /* Cull non-ccw and zero-sized triangles. - * - * XXX: subject to overflow?? - */ - if (area <= 0) { - lp_scene_putback_data( scene, tri_bytes ); - LP_COUNT(nr_culled_tris); - return TRUE; - } - /* Setup parameter interpolants: */ - lp_setup_tri_coef( setup, &tri->inputs, v0, v1, v2, frontfacing ); + setup->setup.variant->jit_function( v0, + v1, + v2, + frontfacing, - tri->inputs.a0, - tri->inputs.dadx, - tri->inputs.dady, ++ GET_A0(&tri->inputs), ++ GET_DADX(&tri->inputs), ++ GET_DADY(&tri->inputs), + &setup->setup.variant->key ); - tri->inputs.facing = frontfacing ? 1.0F : -1.0F; + tri->inputs.frontfacing = frontfacing; tri->inputs.disable = FALSE; tri->inputs.opaque = setup->fs.current.variant->opaque; - tri->inputs.state = setup->fs.stored; + if (0) + lp_dump_setup_coef(&setup->setup.variant->key, - (const float (*)[4])tri->inputs.a0, - (const float (*)[4])tri->inputs.dadx, - (const float (*)[4])tri->inputs.dady); - - for (i = 0; i < 3; i++) { - struct lp_rast_plane *plane = &tri->plane[i]; ++ (const float (*)[4])GET_A0(&tri->inputs), ++ (const float (*)[4])GET_DADX(&tri->inputs), ++ (const float (*)[4])GET_DADY(&tri->inputs)); ++ + plane = GET_PLANES(tri); + + #if defined(PIPE_ARCH_SSE) + { + __m128i vertx, verty; + __m128i shufx, shufy; + __m128i dcdx, dcdy, c; + __m128i unused; + __m128i dcdx_neg_mask; + __m128i dcdy_neg_mask; + __m128i dcdx_zero_mask; + __m128i top_left_flag; + __m128i c_inc_mask, c_inc; + __m128i eo, p0, p1, p2; + __m128i zero = _mm_setzero_si128(); + + vertx = _mm_loadu_si128((__m128i *)x); /* vertex x coords */ + verty = _mm_loadu_si128((__m128i *)y); /* vertex y coords */ + + shufx = _mm_shuffle_epi32(vertx, _MM_SHUFFLE(3,0,2,1)); + shufy = _mm_shuffle_epi32(verty, _MM_SHUFFLE(3,0,2,1)); + + dcdx = _mm_sub_epi32(verty, shufy); + dcdy = _mm_sub_epi32(vertx, shufx); + + dcdx_neg_mask = _mm_srai_epi32(dcdx, 31); + dcdx_zero_mask = _mm_cmpeq_epi32(dcdx, zero); + dcdy_neg_mask = _mm_srai_epi32(dcdy, 31); + + top_left_flag = _mm_set1_epi32((setup->pixel_offset == 0) ? ~0 : 0); + + c_inc_mask = _mm_or_si128(dcdx_neg_mask, + _mm_and_si128(dcdx_zero_mask, + _mm_xor_si128(dcdy_neg_mask, + top_left_flag))); - /* half-edge constants, will be interated over the whole render - * target. + c_inc = _mm_srli_epi32(c_inc_mask, 31); + + c = _mm_sub_epi32(mm_mullo_epi32(dcdx, vertx), + mm_mullo_epi32(dcdy, verty)); + + c = _mm_add_epi32(c, c_inc); + + /* Scale up to match c: */ - plane->c = plane->dcdx * x[i] - plane->dcdy * y[i]; - - /* correct for top-left vs. bottom-left fill convention. - * - * note that we're overloading gl_rasterization_rules to mean - * both (0.5,0.5) pixel centers *and* bottom-left filling - * convention. - * - * GL actually has a top-left filling convention, but GL's - * notion of "top" differs from gallium's... - * - * Also, sometimes (in FBO cases) GL will render upside down - * to its usual method, in which case it will probably want - * to use the opposite, top-left convention. - */ - if (plane->dcdx < 0) { - /* both fill conventions want this - adjust for left edges */ - plane->c++; - } - else if (plane->dcdx == 0) { - if (setup->pixel_offset == 0) { - /* correct for top-left fill convention: - */ - if (plane->dcdy > 0) plane->c++; + dcdx = _mm_slli_epi32(dcdx, FIXED_ORDER); + dcdy = _mm_slli_epi32(dcdy, FIXED_ORDER); + + /* Calculate trivial reject values: + */ + eo = _mm_sub_epi32(_mm_andnot_si128(dcdy_neg_mask, dcdy), + _mm_and_si128(dcdx_neg_mask, dcdx)); + + /* ei = _mm_sub_epi32(_mm_sub_epi32(dcdy, dcdx), eo); */ + + /* Pointless transpose which gets undone immediately in + * rasterization: + */ + transpose4_epi32(&c, &dcdx, &dcdy, &eo, + &p0, &p1, &p2, &unused); + + _mm_store_si128((__m128i *)&plane[0], p0); + _mm_store_si128((__m128i *)&plane[1], p1); + _mm_store_si128((__m128i *)&plane[2], p2); + } + #else + { + int i; + plane[0].dcdy = x[0] - x[1]; + plane[1].dcdy = x[1] - x[2]; + plane[2].dcdy = x[2] - x[0]; + plane[0].dcdx = y[0] - y[1]; + plane[1].dcdx = y[1] - y[2]; + plane[2].dcdx = y[2] - y[0]; + + for (i = 0; i < 3; i++) { + /* half-edge constants, will be interated over the whole render + * target. + */ + plane[i].c = plane[i].dcdx * x[i] - plane[i].dcdy * y[i]; + + /* correct for top-left vs. bottom-left fill convention. + * + * note that we're overloading gl_rasterization_rules to mean + * both (0.5,0.5) pixel centers *and* bottom-left filling + * convention. + * + * GL actually has a top-left filling convention, but GL's + * notion of "top" differs from gallium's... + * + * Also, sometimes (in FBO cases) GL will render upside down + * to its usual method, in which case it will probably want + * to use the opposite, top-left convention. + */ + if (plane[i].dcdx < 0) { + /* both fill conventions want this - adjust for left edges */ + plane[i].c++; } - else { - /* correct for bottom-left fill convention: - */ - if (plane->dcdy < 0) plane->c++; + else if (plane[i].dcdx == 0) { + if (setup->pixel_offset == 0) { + /* correct for top-left fill convention: + */ + if (plane[i].dcdy > 0) plane[i].c++; + } + else { + /* correct for bottom-left fill convention: + */ + if (plane[i].dcdy < 0) plane[i].c++; + } } - } - plane->dcdx *= FIXED_ONE; - plane->dcdy *= FIXED_ONE; + plane[i].dcdx *= FIXED_ONE; + plane[i].dcdy *= FIXED_ONE; - /* find trivial reject offsets for each edge for a single-pixel - * sized block. These will be scaled up at each recursive level to - * match the active blocksize. Scaling in this way works best if - * the blocks are square. - */ - plane->eo = 0; - if (plane->dcdx < 0) plane->eo -= plane->dcdx; - if (plane->dcdy > 0) plane->eo += plane->dcdy; + /* find trivial reject offsets for each edge for a single-pixel + * sized block. These will be scaled up at each recursive level to + * match the active blocksize. Scaling in this way works best if + * the blocks are square. + */ + plane[i].eo = 0; + if (plane[i].dcdx < 0) plane[i].eo -= plane[i].dcdx; + if (plane[i].dcdy > 0) plane[i].eo += plane[i].dcdy; + } + } + #endif - /* Calculate trivial accept offsets from the above. - */ - plane->ei = plane->dcdy - plane->dcdx - plane->eo; + if (0) { + debug_printf("p0: %08x/%08x/%08x/%08x\n", + plane[0].c, + plane[0].dcdx, + plane[0].dcdy, + plane[0].eo); + + debug_printf("p1: %08x/%08x/%08x/%08x\n", + plane[1].c, + plane[1].dcdx, + plane[1].dcdy, + plane[1].eo); + + debug_printf("p0: %08x/%08x/%08x/%08x\n", + plane[2].c, + plane[2].dcdx, + plane[2].dcdy, + plane[2].eo); } diff --cc src/gallium/drivers/llvmpipe/lp_state_derived.c index de242aa93ca,7f68818ab47..0f5f7369e04 --- a/src/gallium/drivers/llvmpipe/lp_state_derived.c +++ b/src/gallium/drivers/llvmpipe/lp_state_derived.c @@@ -73,8 -72,60 +73,8 @@@ compute_vertex_info(struct llvmpipe_con */ vs_index = draw_find_shader_output(llvmpipe->draw, - lpfs->info.input_semantic_name[i], - lpfs->info.input_semantic_index[i]); + lpfs->info.base.input_semantic_name[i], + lpfs->info.base.input_semantic_index[i]); - if (vs_index < 0) { - /* - * This can happen with sprite coordinates - the vertex - * shader doesn't need to provide an output as we generate - * them internally. However, lets keep pretending that there - * is something there to not confuse other code. - */ - vs_index = 0; - } - - /* This can be pre-computed, except for flatshade: - */ - inputs[i].usage_mask = lpfs->info.base.input_usage_mask[i]; - - switch (lpfs->info.base.input_interpolate[i]) { - case TGSI_INTERPOLATE_CONSTANT: - inputs[i].interp = LP_INTERP_CONSTANT; - break; - case TGSI_INTERPOLATE_LINEAR: - inputs[i].interp = LP_INTERP_LINEAR; - break; - case TGSI_INTERPOLATE_PERSPECTIVE: - inputs[i].interp = LP_INTERP_PERSPECTIVE; - break; - default: - assert(0); - break; - } - - switch (lpfs->info.base.input_semantic_name[i]) { - case TGSI_SEMANTIC_FACE: - inputs[i].interp = LP_INTERP_FACING; - break; - case TGSI_SEMANTIC_POSITION: - /* Position was already emitted above - */ - inputs[i].interp = LP_INTERP_POSITION; - inputs[i].src_index = 0; - continue; - case TGSI_SEMANTIC_COLOR: - /* Colors are linearly inputs[i].interpolated in the fragment shader - * even when flatshading is active. This just tells the - * setup module to use coefficients with ddx==0 and - * ddy==0. - */ - if (llvmpipe->rasterizer->flatshade) - inputs[i].interp = LP_INTERP_CONSTANT; - break; - - default: - break; - } /* * Emit the requested fs attribute for all but position. @@@ -92,9 -145,15 +92,8 @@@ draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT, vs_index); } - llvmpipe->num_inputs = lpfs->info.base.num_inputs; - draw_compute_vertex_size(vinfo); - lp_setup_set_vertex_info(llvmpipe->setup, vinfo); -- - lp_setup_set_fs_inputs(llvmpipe->setup, - inputs, - lpfs->info.base.num_inputs); } diff --cc src/gallium/drivers/llvmpipe/lp_state_fs.c index ad058e384ad,7acbe7e86c3..9fbedac165f --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@@ -506,18 -544,6 +543,18 @@@ generate_fragment(struct llvmpipe_scree unsigned chan; unsigned cbuf; + /* Adjust color input interpolation according to flatshade state: + */ - memcpy(inputs, shader->inputs, shader->info.num_inputs * sizeof inputs[0]); - for (i = 0; i < shader->info.num_inputs; i++) { ++ memcpy(inputs, shader->inputs, shader->info.base.num_inputs * sizeof inputs[0]); ++ for (i = 0; i < shader->info.base.num_inputs; i++) { + if (inputs[i].interp == LP_INTERP_COLOR) { + if (key->flatshade) + inputs[i].interp = LP_INTERP_CONSTANT; + else + inputs[i].interp = LP_INTERP_LINEAR; + } + } + /* TODO: actually pick these based on the fs and color buffer * characteristics. */ @@@ -616,8 -643,8 +653,8 @@@ * already included in the shader key. */ lp_build_interp_soa_init(&interp, - shader->info.num_inputs, - lp->num_inputs, - lp->inputs, ++ shader->info.base.num_inputs, + inputs, builder, fs_type, a0_ptr, dadx_ptr, dady_ptr, x, y); @@@ -631,12 -662,9 +672,9 @@@ LLVMValueRef out_color[PIPE_MAX_COLOR_BUFS][NUM_CHANNELS]; LLVMValueRef depth_ptr_i; - if(i != 0) - lp_build_interp_soa_update(&interp, i); - - depth_ptr_i = LLVMBuildGEP(builder, depth_ptr, &index, 1, ""); + depth_ptr_i = LLVMBuildGEP(builder, depth_ptr, &depth_offset, 1, ""); - generate_fs(lp, shader, key, + generate_fs(shader, key, builder, fs_type, context_ptr, @@@ -896,9 -956,9 +966,10 @@@ static void llvmpipe_create_fs_state(struct pipe_context *pipe, const struct pipe_shader_state *templ) { + struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); struct lp_fragment_shader *shader; int nr_samplers; + int i; shader = CALLOC_STRUCT(lp_fragment_shader); if (!shader) @@@ -918,46 -985,6 +996,46 @@@ shader->variant_key_size = Offset(struct lp_fragment_shader_variant_key, sampler[nr_samplers]); - for (i = 0; i < shader->info.num_inputs; i++) { - shader->inputs[i].usage_mask = shader->info.input_usage_mask[i]; ++ for (i = 0; i < shader->info.base.num_inputs; i++) { ++ shader->inputs[i].usage_mask = shader->info.base.input_usage_mask[i]; + - switch (shader->info.input_interpolate[i]) { ++ switch (shader->info.base.input_interpolate[i]) { + case TGSI_INTERPOLATE_CONSTANT: + shader->inputs[i].interp = LP_INTERP_CONSTANT; + break; + case TGSI_INTERPOLATE_LINEAR: + shader->inputs[i].interp = LP_INTERP_LINEAR; + break; + case TGSI_INTERPOLATE_PERSPECTIVE: + shader->inputs[i].interp = LP_INTERP_PERSPECTIVE; + break; + default: + assert(0); + break; + } + - switch (shader->info.input_semantic_name[i]) { ++ switch (shader->info.base.input_semantic_name[i]) { + case TGSI_SEMANTIC_COLOR: + /* Colors may be either linearly or constant interpolated in + * the fragment shader, but that information isn't available + * here. Mark color inputs and fix them up later. + */ + shader->inputs[i].interp = LP_INTERP_COLOR; + break; + case TGSI_SEMANTIC_FACE: + shader->inputs[i].interp = LP_INTERP_FACING; + break; + case TGSI_SEMANTIC_POSITION: + /* Position was already emitted above + */ + shader->inputs[i].interp = LP_INTERP_POSITION; + shader->inputs[i].src_index = 0; + continue; + } + + shader->inputs[i].src_index = i+1; + } + if (LP_DEBUG & DEBUG_TGSI) { unsigned attrib; debug_printf("llvmpipe: Create fragment shader #%u %p:\n", shader->no, (void *) shader); diff --cc src/gallium/drivers/llvmpipe/lp_state_fs.h index f73c7801c00,ddad117acac..7d58c4936c7 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.h +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.h @@@ -34,7 -34,7 +34,8 @@@ #include "pipe/p_state.h" #include "tgsi/tgsi_scan.h" /* for tgsi_shader_info */ #include "gallivm/lp_bld_sample.h" /* for struct lp_sampler_static_state */ + #include "gallivm/lp_bld_tgsi.h" /* for lp_tgsi_info */ +#include "lp_bld_interp.h" /* for struct lp_shader_input */ struct tgsi_token; diff --cc src/gallium/drivers/llvmpipe/lp_state_setup.c index ee4991bf8d2,00000000000..a8dee280dd5 mode 100644,000000..100644 --- a/src/gallium/drivers/llvmpipe/lp_state_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_state_setup.c @@@ -1,760 -1,0 +1,760 @@@ +/************************************************************************** + * + * Copyright 2010 VMware. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "util/u_math.h" +#include "util/u_memory.h" +#include "util/u_simple_list.h" +#include "os/os_time.h" +#include "gallivm/lp_bld_debug.h" +#include "gallivm/lp_bld_init.h" +#include "gallivm/lp_bld_intr.h" +#include /* for LLVMVerifyFunction */ + +#include "lp_perf.h" +#include "lp_debug.h" +#include "lp_flush.h" +#include "lp_screen.h" +#include "lp_context.h" +#include "lp_setup_context.h" +#include "lp_rast.h" +#include "lp_state.h" +#include "lp_state_fs.h" +#include "lp_state_setup.h" + + + +/* currently organized to interpolate full float[4] attributes even + * when some elements are unused. Later, can pack vertex data more + * closely. + */ + + +struct lp_setup_args +{ + /* Function arguments: + */ + LLVMValueRef v0; + LLVMValueRef v1; + LLVMValueRef v2; + LLVMValueRef facing; /* boolean */ + LLVMValueRef a0; + LLVMValueRef dadx; + LLVMValueRef dady; + + /* Derived: + */ + LLVMValueRef x0_center; + LLVMValueRef y0_center; + LLVMValueRef dy20_ooa; + LLVMValueRef dy01_ooa; + LLVMValueRef dx20_ooa; + LLVMValueRef dx01_ooa; +}; + +static LLVMTypeRef type4f(void) +{ + return LLVMVectorType(LLVMFloatType(), 4); +} + + +/* Equivalent of _mm_setr_ps(a,b,c,d) + */ +static LLVMValueRef vec4f(LLVMBuilderRef bld, + LLVMValueRef a, LLVMValueRef b, LLVMValueRef c, LLVMValueRef d, + const char *name) +{ + LLVMValueRef i0 = LLVMConstInt(LLVMInt32Type(), 0, 0); + LLVMValueRef i1 = LLVMConstInt(LLVMInt32Type(), 1, 0); + LLVMValueRef i2 = LLVMConstInt(LLVMInt32Type(), 2, 0); + LLVMValueRef i3 = LLVMConstInt(LLVMInt32Type(), 3, 0); + + LLVMValueRef res = LLVMGetUndef(type4f()); + + res = LLVMBuildInsertElement(bld, res, a, i0, ""); + res = LLVMBuildInsertElement(bld, res, b, i1, ""); + res = LLVMBuildInsertElement(bld, res, c, i2, ""); + res = LLVMBuildInsertElement(bld, res, d, i3, name); + + return res; +} + +/* Equivalent of _mm_set1_ps(a) + */ +static LLVMValueRef vec4f_from_scalar(LLVMBuilderRef bld, + LLVMValueRef a, + const char *name) +{ + LLVMValueRef res = LLVMGetUndef(type4f()); + int i; + + for(i = 0; i < 4; ++i) { + LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); + res = LLVMBuildInsertElement(bld, res, a, index, i == 3 ? name : ""); + } + + return res; +} + +static void +store_coef(LLVMBuilderRef builder, + struct lp_setup_args *args, + unsigned slot, + LLVMValueRef a0, + LLVMValueRef dadx, + LLVMValueRef dady) +{ + LLVMValueRef idx = LLVMConstInt(LLVMInt32Type(), slot, 0); + + LLVMBuildStore(builder, + a0, + LLVMBuildGEP(builder, args->a0, &idx, 1, "")); + + LLVMBuildStore(builder, + dadx, + LLVMBuildGEP(builder, args->dadx, &idx, 1, "")); + + LLVMBuildStore(builder, + dady, + LLVMBuildGEP(builder, args->dady, &idx, 1, "")); +} + + + +static void +emit_constant_coef4( LLVMBuilderRef builder, + struct lp_setup_args *args, + unsigned slot, + LLVMValueRef vert, + unsigned attr) +{ + LLVMValueRef zero = LLVMConstReal(LLVMFloatType(), 0.0); + LLVMValueRef zerovec = vec4f_from_scalar(builder, zero, "zero"); + LLVMValueRef idx = LLVMConstInt(LLVMInt32Type(), attr, 0); + LLVMValueRef attr_ptr = LLVMBuildGEP(builder, vert, &idx, 1, "attr_ptr"); + LLVMValueRef vert_attr = LLVMBuildLoad(builder, attr_ptr, "vert_attr"); + + store_coef(builder, args, slot, vert_attr, zerovec, zerovec); +} + + + +/** + * Setup the fragment input attribute with the front-facing value. + * \param frontface is the triangle front facing? + */ +static void +emit_facing_coef( LLVMBuilderRef builder, + struct lp_setup_args *args, + unsigned slot ) +{ + LLVMValueRef a0_0 = args->facing; + LLVMValueRef a0_0f = LLVMBuildSIToFP(builder, a0_0, LLVMFloatType(), ""); + LLVMValueRef zero = LLVMConstReal(LLVMFloatType(), 0.0); + LLVMValueRef a0 = vec4f(builder, a0_0f, zero, zero, zero, "facing"); + LLVMValueRef zerovec = vec4f_from_scalar(builder, zero, "zero"); + + store_coef(builder, args, slot, a0, zerovec, zerovec); +} + + +static LLVMValueRef +vert_attrib(LLVMBuilderRef b, + LLVMValueRef vert, + int attr, + int elem, + const char *name) +{ + LLVMValueRef idx[2]; + idx[0] = LLVMConstInt(LLVMInt32Type(), attr, 0); + idx[1] = LLVMConstInt(LLVMInt32Type(), elem, 0); + return LLVMBuildLoad(b, LLVMBuildGEP(b, vert, idx, 2, ""), name); +} + + + +static void +emit_coef4( LLVMBuilderRef b, + struct lp_setup_args *args, + unsigned slot, + LLVMValueRef a0, + LLVMValueRef a1, + LLVMValueRef a2) +{ + LLVMValueRef dy20_ooa = args->dy20_ooa; + LLVMValueRef dy01_ooa = args->dy01_ooa; + LLVMValueRef dx20_ooa = args->dx20_ooa; + LLVMValueRef dx01_ooa = args->dx01_ooa; + LLVMValueRef x0_center = args->x0_center; + LLVMValueRef y0_center = args->y0_center; + + /* XXX: using fsub, fmul on vector types -- does this work?? + */ + LLVMValueRef da01 = LLVMBuildFSub(b, a0, a1, "da01"); + LLVMValueRef da20 = LLVMBuildFSub(b, a2, a0, "da20"); + + /* Calculate dadx (vec4f) + */ + LLVMValueRef da01_dy20_ooa = LLVMBuildFMul(b, da01, dy20_ooa, "da01_dy20_ooa"); + LLVMValueRef da20_dy01_ooa = LLVMBuildFMul(b, da20, dy01_ooa, "da20_dy01_ooa"); + LLVMValueRef dadx = LLVMBuildFSub(b, da01_dy20_ooa, da20_dy01_ooa, "dadx"); + + /* Calculate dady (vec4f) + */ + LLVMValueRef da01_dx20_ooa = LLVMBuildFMul(b, da01, dx20_ooa, "da01_dx20_ooa"); + LLVMValueRef da20_dx01_ooa = LLVMBuildFMul(b, da20, dx01_ooa, "da20_dx01_ooa"); + LLVMValueRef dady = LLVMBuildFSub(b, da20_dx01_ooa, da01_dx20_ooa, "dady"); + + /* Calculate a0 - the attribute value at the origin + */ + LLVMValueRef dadx_x0 = LLVMBuildFMul(b, dadx, x0_center, "dadx_x0"); + LLVMValueRef dady_y0 = LLVMBuildFMul(b, dady, y0_center, "dady_y0"); + LLVMValueRef attr_v0 = LLVMBuildFAdd(b, dadx_x0, dady_y0, "attr_v0"); + LLVMValueRef attr_0 = LLVMBuildFSub(b, a0, attr_v0, "attr_0"); + + store_coef(b, args, slot, attr_0, dadx, dady); +} + + +static void +emit_linear_coef( LLVMBuilderRef b, + struct lp_setup_args *args, + unsigned slot, + unsigned vert_attr) +{ + LLVMValueRef idx = LLVMConstInt(LLVMInt32Type(), vert_attr, 0); + + LLVMValueRef a0 = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v0, &idx, 1, ""), "v0a"); + LLVMValueRef a1 = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v1, &idx, 1, ""), "v1a"); + LLVMValueRef a2 = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v2, &idx, 1, ""), "v2a"); + + emit_coef4(b, args, slot, a0, a1, a2); +} + + + +/** + * Compute a0, dadx and dady for a perspective-corrected interpolant, + * for a triangle. + * We basically multiply the vertex value by 1/w before computing + * the plane coefficients (a0, dadx, dady). + * Later, when we compute the value at a particular fragment position we'll + * divide the interpolated value by the interpolated W at that fragment. + */ +static void +emit_perspective_coef( LLVMBuilderRef b, + struct lp_setup_args *args, + unsigned slot, + unsigned vert_attr) +{ + /* premultiply by 1/w (v[0][3] is always 1/w): + */ + LLVMValueRef idx = LLVMConstInt(LLVMInt32Type(), vert_attr, 0); + + LLVMValueRef v0a = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v0, &idx, 1, ""), "v0a"); + LLVMValueRef v1a = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v1, &idx, 1, ""), "v1a"); + LLVMValueRef v2a = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v2, &idx, 1, ""), "v2a"); + + LLVMValueRef v0_oow = vec4f_from_scalar(b, vert_attrib(b, args->v0, 0, 3, ""), "v0_oow"); + LLVMValueRef v1_oow = vec4f_from_scalar(b, vert_attrib(b, args->v1, 0, 3, ""), "v1_oow"); + LLVMValueRef v2_oow = vec4f_from_scalar(b, vert_attrib(b, args->v2, 0, 3, ""), "v2_oow"); + + LLVMValueRef v0_oow_v0a = LLVMBuildFMul(b, v0a, v0_oow, "v0_oow_v0a"); + LLVMValueRef v1_oow_v1a = LLVMBuildFMul(b, v1a, v1_oow, "v1_oow_v1a"); + LLVMValueRef v2_oow_v2a = LLVMBuildFMul(b, v2a, v2_oow, "v2_oow_v2a"); + + emit_coef4(b, args, slot, v0_oow_v0a, v1_oow_v1a, v2_oow_v2a); +} + + +static void +emit_position_coef( LLVMBuilderRef builder, + struct lp_setup_args *args, + int slot, int attrib ) +{ + emit_linear_coef(builder, args, slot, attrib); +} + + + + +/** + * Compute the inputs-> dadx, dady, a0 values. + */ +static void +emit_tri_coef( LLVMBuilderRef builder, + const struct lp_setup_variant_key *key, + struct lp_setup_args *args ) +{ + unsigned slot; + + /* The internal position input is in slot zero: + */ + emit_position_coef(builder, args, 0, 0); + + /* setup interpolation for all the remaining attributes: + */ + for (slot = 0; slot < key->num_inputs; slot++) { + unsigned vert_attr = key->inputs[slot].src_index; + + switch (key->inputs[slot].interp) { + case LP_INTERP_CONSTANT: + if (key->flatshade_first) { + emit_constant_coef4(builder, args, slot+1, args->v0, vert_attr); + } + else { + emit_constant_coef4(builder, args, slot+1, args->v2, vert_attr); + } + break; + + case LP_INTERP_LINEAR: + emit_linear_coef(builder, args, slot+1, vert_attr); + break; + + case LP_INTERP_PERSPECTIVE: + emit_perspective_coef(builder, args, slot+1, vert_attr); + break; + + case LP_INTERP_POSITION: + /* + * The generated pixel interpolators will pick up the coeffs from + * slot 0. + */ + break; + + case LP_INTERP_FACING: + emit_facing_coef(builder, args, slot+1); + break; + + default: + assert(0); + } + } +} + + +/* XXX: This is generic code, share with fs/vs codegen: + */ +static lp_jit_setup_triangle +finalize_function(struct llvmpipe_screen *screen, + LLVMBuilderRef builder, + LLVMValueRef function) +{ + void *f; + + /* Verify the LLVM IR. If invalid, dump and abort */ +#ifdef DEBUG + if (LLVMVerifyFunction(function, LLVMPrintMessageAction)) { + if (1) + lp_debug_dump_value(function); + abort(); + } +#endif + + /* Apply optimizations to LLVM IR */ + LLVMRunFunctionPassManager(screen->pass, function); + + if (gallivm_debug & GALLIVM_DEBUG_IR) + { + /* Print the LLVM IR to stderr */ + lp_debug_dump_value(function); + debug_printf("\n"); + } + + /* + * Translate the LLVM IR into machine code. + */ + f = LLVMGetPointerToGlobal(screen->engine, function); + + if (gallivm_debug & GALLIVM_DEBUG_ASM) + { + lp_disassemble(f); + } + + lp_func_delete_body(function); + + return f; +} + +/* XXX: Generic code: + */ +static void +lp_emit_emms(LLVMBuilderRef builder) +{ +#ifdef PIPE_ARCH_X86 + /* Avoid corrupting the FPU stack on 32bit OSes. */ + lp_build_intrinsic(builder, "llvm.x86.mmx.emms", LLVMVoidType(), NULL, 0); +#endif +} + + +/* XXX: generic code: + */ +static void +set_noalias(LLVMBuilderRef builder, + LLVMValueRef function, + const LLVMTypeRef *arg_types, + int nr_args) +{ + int i; + for(i = 0; i < Elements(arg_types); ++i) + if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind) + LLVMAddAttribute(LLVMGetParam(function, i), + LLVMNoAliasAttribute); +} + +static void +init_args(LLVMBuilderRef b, + struct lp_setup_args *args, + const struct lp_setup_variant *variant) +{ + LLVMValueRef v0_x = vert_attrib(b, args->v0, 0, 0, "v0_x"); + LLVMValueRef v0_y = vert_attrib(b, args->v0, 0, 1, "v0_y"); + + LLVMValueRef v1_x = vert_attrib(b, args->v1, 0, 0, "v1_x"); + LLVMValueRef v1_y = vert_attrib(b, args->v1, 0, 1, "v1_y"); + + LLVMValueRef v2_x = vert_attrib(b, args->v2, 0, 0, "v2_x"); + LLVMValueRef v2_y = vert_attrib(b, args->v2, 0, 1, "v2_y"); + + LLVMValueRef pixel_center = LLVMConstReal(LLVMFloatType(), + variant->key.pixel_center_half ? 0.5 : 0); + + LLVMValueRef x0_center = LLVMBuildFSub(b, v0_x, pixel_center, "x0_center" ); + LLVMValueRef y0_center = LLVMBuildFSub(b, v0_y, pixel_center, "y0_center" ); + + LLVMValueRef dx01 = LLVMBuildFSub(b, v0_x, v1_x, "dx01"); + LLVMValueRef dy01 = LLVMBuildFSub(b, v0_y, v1_y, "dy01"); + LLVMValueRef dx20 = LLVMBuildFSub(b, v2_x, v0_x, "dx20"); + LLVMValueRef dy20 = LLVMBuildFSub(b, v2_y, v0_y, "dy20"); + + LLVMValueRef one = LLVMConstReal(LLVMFloatType(), 1.0); + LLVMValueRef e = LLVMBuildFMul(b, dx01, dy20, "e"); + LLVMValueRef f = LLVMBuildFMul(b, dx20, dy01, "f"); + LLVMValueRef ooa = LLVMBuildFDiv(b, one, LLVMBuildFSub(b, e, f, ""), "ooa"); + + LLVMValueRef dy20_ooa = LLVMBuildFMul(b, dy20, ooa, "dy20_ooa"); + LLVMValueRef dy01_ooa = LLVMBuildFMul(b, dy01, ooa, "dy01_ooa"); + LLVMValueRef dx20_ooa = LLVMBuildFMul(b, dx20, ooa, "dx20_ooa"); + LLVMValueRef dx01_ooa = LLVMBuildFMul(b, dx01, ooa, "dx01_ooa"); + + args->dy20_ooa = vec4f_from_scalar(b, dy20_ooa, "dy20_ooa_4f"); + args->dy01_ooa = vec4f_from_scalar(b, dy01_ooa, "dy01_ooa_4f"); + + args->dx20_ooa = vec4f_from_scalar(b, dx20_ooa, "dx20_ooa_4f"); + args->dx01_ooa = vec4f_from_scalar(b, dx01_ooa, "dx01_ooa_4f"); + + args->x0_center = vec4f_from_scalar(b, x0_center, "x0_center_4f"); + args->y0_center = vec4f_from_scalar(b, y0_center, "y0_center_4f"); +} + +/** + * Generate the runtime callable function for the coefficient calculation. + * + */ +static struct lp_setup_variant * +generate_setup_variant(struct llvmpipe_screen *screen, + struct lp_setup_variant_key *key) +{ + struct lp_setup_variant *variant = NULL; + struct lp_setup_args args; + char func_name[256]; + LLVMTypeRef vec4f_type; + LLVMTypeRef func_type; + LLVMTypeRef arg_types[8]; + LLVMBasicBlockRef block; + LLVMBuilderRef builder; + int64_t t0, t1; + + if (0) + goto fail; + + variant = CALLOC_STRUCT(lp_setup_variant); + if (variant == NULL) + goto fail; + + if (LP_DEBUG & DEBUG_COUNTERS) { + t0 = os_time_get(); + } + + memcpy(&variant->key, key, key->size); + variant->list_item_global.base = variant; + + util_snprintf(func_name, sizeof(func_name), "fs%u_setup%u", + 0, + variant->no); + + /* Currently always deal with full 4-wide vertex attributes from + * the vertices. + */ + + vec4f_type = LLVMVectorType(LLVMFloatType(), 4); + + arg_types[0] = LLVMPointerType(vec4f_type, 0); /* v0 */ + arg_types[1] = LLVMPointerType(vec4f_type, 0); /* v1 */ + arg_types[2] = LLVMPointerType(vec4f_type, 0); /* v2 */ + arg_types[3] = LLVMInt32Type(); /* facing */ + arg_types[4] = LLVMPointerType(vec4f_type, 0); /* a0, aligned */ + arg_types[5] = LLVMPointerType(vec4f_type, 0); /* dadx, aligned */ + arg_types[6] = LLVMPointerType(vec4f_type, 0); /* dady, aligned */ + arg_types[7] = LLVMPointerType(vec4f_type, 0); /* key, unused */ + + func_type = LLVMFunctionType(LLVMVoidType(), arg_types, Elements(arg_types), 0); + + variant->function = LLVMAddFunction(screen->module, func_name, func_type); + if (!variant->function) + goto fail; + + LLVMSetFunctionCallConv(variant->function, LLVMCCallConv); + + args.v0 = LLVMGetParam(variant->function, 0); + args.v1 = LLVMGetParam(variant->function, 1); + args.v2 = LLVMGetParam(variant->function, 2); + args.facing = LLVMGetParam(variant->function, 3); + args.a0 = LLVMGetParam(variant->function, 4); + args.dadx = LLVMGetParam(variant->function, 5); + args.dady = LLVMGetParam(variant->function, 6); + + lp_build_name(args.v0, "in_v0"); + lp_build_name(args.v1, "in_v1"); + lp_build_name(args.v2, "in_v2"); + lp_build_name(args.facing, "in_facing"); + lp_build_name(args.a0, "out_a0"); + lp_build_name(args.dadx, "out_dadx"); + lp_build_name(args.dady, "out_dady"); + + /* + * Function body + */ + block = LLVMAppendBasicBlock(variant->function, "entry"); + builder = LLVMCreateBuilder(); + LLVMPositionBuilderAtEnd(builder, block); + + set_noalias(builder, variant->function, arg_types, Elements(arg_types)); + init_args(builder, &args, variant); + emit_tri_coef(builder, &variant->key, &args); + + lp_emit_emms(builder); + LLVMBuildRetVoid(builder); + LLVMDisposeBuilder(builder); + + variant->jit_function = finalize_function(screen, builder, + variant->function); + if (!variant->jit_function) + goto fail; + + /* + * Update timing information: + */ + if (LP_DEBUG & DEBUG_COUNTERS) { + t1 = os_time_get(); + LP_COUNT_ADD(llvm_compile_time, t1 - t0); + LP_COUNT_ADD(nr_llvm_compiles, 1); + } + + return variant; + +fail: + if (variant) { + if (variant->function) { + if (variant->jit_function) + LLVMFreeMachineCodeForFunction(screen->engine, + variant->function); + LLVMDeleteFunction(variant->function); + } + FREE(variant); + } + + return NULL; +} + + + +static void +lp_make_setup_variant_key(struct llvmpipe_context *lp, + struct lp_setup_variant_key *key) +{ + struct lp_fragment_shader *fs = lp->fs; + unsigned i; + + assert(sizeof key->inputs[0] == sizeof(ushort)); + - key->num_inputs = fs->info.num_inputs; ++ key->num_inputs = fs->info.base.num_inputs; + key->flatshade_first = lp->rasterizer->flatshade_first; + key->pixel_center_half = lp->rasterizer->gl_rasterization_rules; + key->size = Offset(struct lp_setup_variant_key, + inputs[key->num_inputs]); + key->pad = 0; + + memcpy(key->inputs, fs->inputs, key->num_inputs * sizeof key->inputs[0]); + for (i = 0; i < key->num_inputs; i++) { + if (key->inputs[i].interp == LP_INTERP_COLOR) { + if (lp->rasterizer->flatshade) + key->inputs[i].interp = LP_INTERP_CONSTANT; + else + key->inputs[i].interp = LP_INTERP_LINEAR; + } + } + +} + + +static void +remove_setup_variant(struct llvmpipe_context *lp, + struct lp_setup_variant *variant) +{ + struct llvmpipe_screen *screen = llvmpipe_screen(lp->pipe.screen); + + if (gallivm_debug & GALLIVM_DEBUG_IR) { + debug_printf("llvmpipe: del setup_variant #%u total %u\n", + variant->no, lp->nr_setup_variants); + } + + if (variant->function) { + if (variant->jit_function) + LLVMFreeMachineCodeForFunction(screen->engine, + variant->function); + LLVMDeleteFunction(variant->function); + } + + remove_from_list(&variant->list_item_global); + lp->nr_setup_variants--; + FREE(variant); +} + + + +/* When the number of setup variants exceeds a threshold, cull a + * fraction (currently a quarter) of them. + */ +static void +cull_setup_variants(struct llvmpipe_context *lp) +{ + struct pipe_context *pipe = &lp->pipe; + int i; + + /* + * XXX: we need to flush the context until we have some sort of reference + * counting in fragment shaders as they may still be binned + * Flushing alone might not be sufficient we need to wait on it too. + */ + llvmpipe_finish(pipe, __FUNCTION__); + + for (i = 0; i < LP_MAX_SETUP_VARIANTS / 4; i++) { + struct lp_setup_variant_list_item *item = last_elem(&lp->setup_variants_list); + remove_setup_variant(lp, item->base); + } +} + + +/** + * Update fragment/vertex shader linkage state. This is called just + * prior to drawing something when some fragment-related state has + * changed. + */ +void +llvmpipe_update_setup(struct llvmpipe_context *lp) +{ + struct llvmpipe_screen *screen = llvmpipe_screen(lp->pipe.screen); + + struct lp_setup_variant_key *key = &lp->setup_variant.key; + struct lp_setup_variant *variant = NULL; + struct lp_setup_variant_list_item *li; + + lp_make_setup_variant_key(lp, key); + + foreach(li, &lp->setup_variants_list) { + if(li->base->key.size == key->size && + memcmp(&li->base->key, key, key->size) == 0) { + variant = li->base; + break; + } + } + + if (variant) { + move_to_head(&lp->setup_variants_list, &variant->list_item_global); + } + else { + if (lp->nr_setup_variants >= LP_MAX_SETUP_VARIANTS) { + cull_setup_variants(lp); + } + + variant = generate_setup_variant(screen, key); + insert_at_head(&lp->setup_variants_list, &variant->list_item_global); + lp->nr_setup_variants++; + } + + lp_setup_set_setup_variant(lp->setup, + variant); +} + +void +lp_delete_setup_variants(struct llvmpipe_context *lp) +{ + struct lp_setup_variant_list_item *li; + li = first_elem(&lp->setup_variants_list); + while(!at_end(&lp->setup_variants_list, li)) { + struct lp_setup_variant_list_item *next = next_elem(li); + remove_setup_variant(lp, li->base); + li = next; + } +} + +void +lp_dump_setup_coef( const struct lp_setup_variant_key *key, + const float (*sa0)[4], + const float (*sdadx)[4], + const float (*sdady)[4]) +{ + int i, slot; + + for (i = 0; i < NUM_CHANNELS; i++) { + float a0 = sa0 [0][i]; + float dadx = sdadx[0][i]; + float dady = sdady[0][i]; + + debug_printf("POS.%c: a0 = %f, dadx = %f, dady = %f\n", + "xyzw"[i], + a0, dadx, dady); + } + + for (slot = 0; slot < key->num_inputs; slot++) { + unsigned usage_mask = key->inputs[slot].usage_mask; + for (i = 0; i < NUM_CHANNELS; i++) { + if (usage_mask & (1 << i)) { + float a0 = sa0 [1 + slot][i]; + float dadx = sdadx[1 + slot][i]; + float dady = sdady[1 + slot][i]; + + debug_printf("IN[%u].%c: a0 = %f, dadx = %f, dady = %f\n", + slot, + "xyzw"[i], + a0, dadx, dady); + } + } + } +}