From 636480cc9c7836daf879cb45644900922cf31f47 Mon Sep 17 00:00:00 2001 From: Brian Date: Fri, 28 Sep 2007 15:39:09 -0600 Subject: [PATCH] Instead of linked program pairs, keep a list of vertex programs translated for each fragment program. --- src/mesa/state_tracker/st_atom_shader.c | 337 +++++++--------------- src/mesa/state_tracker/st_cb_clear.c | 4 +- src/mesa/state_tracker/st_cb_drawpixels.c | 4 +- src/mesa/state_tracker/st_cb_program.c | 12 + src/mesa/state_tracker/st_program.h | 30 +- 5 files changed, 149 insertions(+), 238 deletions(-) diff --git a/src/mesa/state_tracker/st_atom_shader.c b/src/mesa/state_tracker/st_atom_shader.c index 9bba78f7a12..d7a89d1e95e 100644 --- a/src/mesa/state_tracker/st_atom_shader.c +++ b/src/mesa/state_tracker/st_atom_shader.c @@ -51,105 +51,56 @@ #include "st_atom_shader.h" - /** - * Structure to describe a (vertex program, fragment program) pair - * which is linked together (used together to render something). This - * linkage basically servers the same purpose as the OpenGL Shading - * Language linker, but also applies to ARB programs and Mesa's - * fixed-function-generated programs. - * - * More background: - * - * The translation from Mesa programs to TGSI programs depends on the - * linkage between the vertex program and the fragment program. This is - * because we tightly pack the inputs and outputs of shaders into - * consecutive "slots". - * - * Suppose an app uses one vertex program "VP" (outputting pos, color and tex0) - * and two fragment programs: - * FP1: uses tex0 input only (input slot 0) - * FP2: uses color input only (input slot 0) - * - * When VP is used with FP1 we want VP.output[2] to match FP1.input[0], but - * when VP is used with FP2 we want VP.output[1] to match FP1.input[0]. - * - * We don't want to re-translate the vertex and/or fragment programs - * each time the VP/FP bindings/linkings change. The solution is this - * structure which stores the translated TGSI shaders on a per-linkage - * basis. - * + * This represents a vertex program, especially translated to match + * the inputs of a particular fragment shader. */ -struct linked_program_pair +struct translated_vertex_program { - struct st_vertex_program *vprog; /**< never changes */ - struct st_fragment_program *fprog; /**< never changes */ + /** The fragment shader "signature" this vertex shader is meant for: */ + GLbitfield frag_inputs; - struct tgsi_token vs_tokens[ST_FP_MAX_TOKENS]; - struct tgsi_token fs_tokens[ST_FP_MAX_TOKENS]; + /** Compared against master vertex program's serialNo: */ + GLuint serialNo; - const struct cso_vertex_shader *vs; - const struct cso_fragment_shader *fs; - - GLuint vertSerialNo, fragSerialNo; + /** Maps VERT_RESULT_x to slot */ + GLuint output_to_slot[VERT_RESULT_MAX]; - /** maps a Mesa VERT_ATTRIB_x to a packed TGSI input index */ - GLuint vp_input_to_index[MAX_VERTEX_PROGRAM_ATTRIBS]; - /** maps a TGSI input index back to a Mesa VERT_ATTRIB_x */ - GLuint vp_index_to_input[MAX_VERTEX_PROGRAM_ATTRIBS]; + /** The program in TGSI format */ + struct tgsi_token tokens[ST_MAX_SHADER_TOKENS]; - GLuint vp_result_to_slot[VERT_RESULT_MAX]; + /** Pointer to the translated, cached vertex shader */ + const struct cso_vertex_shader *vs; - struct linked_program_pair *next; + struct translated_vertex_program *next; /**< next in linked list */ }; -/** XXX temporary - use some kind of hash table instead */ -static struct linked_program_pair *Pairs = NULL; - - -static void -find_and_remove(struct gl_program *prog) -{ - struct linked_program_pair *pair, *prev = NULL, *next; - for (pair = Pairs; pair; pair = next) { - next = pair->next; - if (pair->vprog == (struct st_vertex_program *) prog || - pair->fprog == (struct st_fragment_program *) prog) { - /* unlink */ - if (prev) - prev->next = next; - else - Pairs = next; - /* delete pair->vs */ - /* delete pair->fs */ - free(pair); - } - else { - prev = pair; - } - } -} - /** - * Delete any known program pairs that use the given vertex program. + * Free data hanging off the st vert prog. */ void st_remove_vertex_program(struct st_context *st, struct st_vertex_program *stvp) { - find_and_remove(&stvp->Base.Base); + /* no-op, for now? */ } /** - * Delete any known program pairs that use the given fragment program. + * Free data hanging off the st frag prog. */ void st_remove_fragment_program(struct st_context *st, struct st_fragment_program *stfp) { - find_and_remove(&stfp->Base.Base); + struct translated_vertex_program *xvp, *next; + + for (xvp = stfp->vertex_programs; xvp; xvp = next) { + next = xvp->next; + /* XXX free xvp->vs */ + free(xvp); + } } @@ -187,182 +138,122 @@ vp_out_to_fp_in(GLuint vertResult) /** - * Examine the outputs written by a vertex program and the inputs read - * by a fragment program to determine which match up and where they - * should be mapped into the generic shader output/input slots. - * \param vert_output_map returns the vertex output register mapping - * \param frag_input_map returns the fragment input register mapping + * Find a translated vertex program that corresponds to stvp and + * has outputs matched to stfp's inputs. + * This performs vertex and fragment translation (to TGSI) when needed. */ -static GLuint -link_outputs_to_inputs(GLbitfield outputsWritten, - GLbitfield inputsRead, - GLuint vert_output_map[], - GLuint frag_input_map[]) +static struct translated_vertex_program * +find_translated_vp(struct st_context *st, + struct st_vertex_program *stvp, + struct st_fragment_program *stfp) { static const GLuint UNUSED = ~0; - GLint vert_slot_to_attr[50], frag_slot_to_attr[50]; - GLuint outAttr, inAttr; - GLuint numIn = 0, dummySlot; - - for (inAttr = 0; inAttr < FRAG_ATTRIB_MAX; inAttr++) { - if (inputsRead & (1 << inAttr)) { - frag_input_map[inAttr] = numIn; - frag_slot_to_attr[numIn] = inAttr; - numIn++; - } - else { - frag_input_map[inAttr] = UNUSED; - } - } + struct translated_vertex_program *xvp; + const GLbitfield fragInputsRead + = stfp->Base.Base.InputsRead | FRAG_BIT_WPOS; - for (outAttr = 0; outAttr < VERT_RESULT_MAX; outAttr++) { - if (outputsWritten & (1 << outAttr)) { - /* see if the frag prog wants this vert output */ - GLint fpIn = vp_out_to_fp_in(outAttr); + /* + * Translate fragment program if needed. + */ + if (!stfp->fs) { + GLuint inAttr, numIn = 0; - if (fpIn >= 0) { - GLuint frag_slot = frag_input_map[fpIn]; - vert_output_map[outAttr] = frag_slot; - vert_slot_to_attr[frag_slot] = outAttr; + for (inAttr = 0; inAttr < FRAG_ATTRIB_MAX; inAttr++) { + if (fragInputsRead & (1 << inAttr)) { + stfp->input_to_slot[inAttr] = numIn; + numIn++; } else { - vert_output_map[outAttr] = UNUSED; + stfp->input_to_slot[inAttr] = UNUSED; } } - else { - vert_output_map[outAttr] = UNUSED; - } - } - /* - * We'll map all unused vertex program outputs to this slot. - * We'll also map all undefined fragment program inputs to this slot. - */ - dummySlot = numIn; + stfp->num_input_slots = numIn; - /* Map vert program outputs that aren't used to the dummy slot */ - for (outAttr = 0; outAttr < VERT_RESULT_MAX; outAttr++) { - if (outputsWritten & (1 << outAttr)) { - if (vert_output_map[outAttr] == UNUSED) - vert_output_map[outAttr] = dummySlot; - } + (void) st_translate_fragment_program(st, stfp, + stfp->input_to_slot, + stfp->tokens, + ST_MAX_SHADER_TOKENS); + assert(stfp->fs); } - /* Map frag program inputs that aren't defined to the dummy slot */ - for (inAttr = 0; inAttr < FRAG_ATTRIB_MAX; inAttr++) { - if (inputsRead & (1 << inAttr)) { - if (frag_input_map[inAttr] == UNUSED) - frag_input_map[inAttr] = dummySlot; - } - } -#if 0 - printf("vOut W slot\n"); - for (outAttr = 0; outAttr < VERT_RESULT_MAX; outAttr++) { - printf("%4d %c %4d\n", outAttr, - " *"[(outputsWritten >> outAttr) & 1], - vert_output_map[outAttr]); - } - printf("vIn R slot\n"); - for (inAttr = 0; inAttr < FRAG_ATTRIB_MAX; inAttr++) { - printf("%3d %c %4d\n", inAttr, - " *"[(inputsRead >> inAttr) & 1], - frag_input_map[inAttr]); + /* See if we've got a translated vertex program whose outputs match + * the fragment program's inputs. + * XXX This could be a hash lookup, using InputsRead as the key. + */ + for (xvp = stfp->vertex_programs; xvp; xvp = xvp->next) { + if (xvp->frag_inputs == stfp->Base.Base.InputsRead) { + break; + } } -#endif - return numIn; -} + /* No? Allocate translated vp object now */ + if (!xvp) { + xvp = CALLOC_STRUCT(translated_vertex_program); + xvp->frag_inputs = fragInputsRead; + xvp->next = stfp->vertex_programs; + stfp->vertex_programs = xvp; + } -static struct linked_program_pair * -lookup_program_pair(struct st_context *st, - struct st_vertex_program *vprog, - struct st_fragment_program *fprog) -{ - struct linked_program_pair *pair; - - /* search */ - for (pair = Pairs; pair; pair = pair->next) { - if (pair->vprog == vprog && pair->fprog == fprog) { - /* found it */ - break; + /* See if we need to translate vertex program to TGSI form */ + if (xvp->serialNo != stvp->serialNo) { + GLuint outAttr, dummySlot; + const GLbitfield outputsWritten = stvp->Base.Base.OutputsWritten; + + /* Compute mapping of vertex program outputs to slots, which depends + * on the fragment program's input->slot mapping. + */ + for (outAttr = 0; outAttr < VERT_RESULT_MAX; outAttr++) { + /* set default: */ + xvp->output_to_slot[outAttr] = UNUSED; + + if (outputsWritten & (1 << outAttr)) { + /* see if the frag prog wants this vert output */ + GLint fpIn = vp_out_to_fp_in(outAttr); + if (fpIn >= 0) { + xvp->output_to_slot[outAttr] = stfp->input_to_slot[fpIn]; + } + } } - } - /* - * Examine the outputs of the vertex shader and the inputs of the - * fragment shader to determine how to match both to a common set - * of slots. - */ - if (!pair) { - pair = CALLOC_STRUCT(linked_program_pair); - if (pair) { - pair->vprog = vprog; - pair->fprog = fprog; + /* Unneeded vertex program outputs will go to this slot. + * We could use this info to do dead code elimination in the + * vertex program. + */ + dummySlot = stfp->num_input_slots; + + /* Map vert program outputs that aren't used to the dummy slot */ + for (outAttr = 0; outAttr < VERT_RESULT_MAX; outAttr++) { + if (outputsWritten & (1 << outAttr)) { + if (xvp->output_to_slot[outAttr] == UNUSED) + xvp->output_to_slot[outAttr] = dummySlot; + } } - } - return pair; -} + xvp->vs = st_translate_vertex_program(st, stvp, + xvp->output_to_slot, + xvp->tokens, + ST_MAX_SHADER_TOKENS); + assert(xvp->vs); + stvp->vs = NULL; /* don't want to use this */ -static void -link_shaders(struct st_context *st, struct linked_program_pair *pair) -{ - struct st_vertex_program *vprog = pair->vprog; - struct st_fragment_program *fprog = pair->fprog; - - assert(vprog); - assert(fprog); - - if (pair->vertSerialNo != vprog->serialNo || - pair->fragSerialNo != fprog->serialNo) { - /* re-link and re-translate */ - GLuint vert_output_mapping[VERT_RESULT_MAX]; - GLuint frag_input_mapping[FRAG_ATTRIB_MAX]; - - link_outputs_to_inputs(vprog->Base.Base.OutputsWritten, - fprog->Base.Base.InputsRead | FRAG_BIT_WPOS, - vert_output_mapping, - frag_input_mapping); - - /* xlate vp to vs + vs tokens */ - st_translate_vertex_program(st, vprog, - vert_output_mapping, - pair->vs_tokens, ST_FP_MAX_TOKENS); - - pair->vprog = vprog; - /* temp hacks */ - pair->vs = vprog->vs; - vprog->vs = NULL; - - - /* xlate fp to fs + fs tokens */ - st_translate_fragment_program(st, fprog, - frag_input_mapping, - pair->fs_tokens, ST_FP_MAX_TOKENS); - pair->fprog = fprog; - /* temp hacks */ - pair->fs = fprog->fs; - fprog->fs = NULL; - - /* save pair */ - pair->next = Pairs; - Pairs = pair; - - pair->vertSerialNo = vprog->serialNo; - pair->fragSerialNo = fprog->serialNo; + /* translated VP is up to date now */ + xvp->serialNo = stvp->serialNo; } + + return xvp; } static void update_linkage( struct st_context *st ) { - struct linked_program_pair *pair; struct st_vertex_program *stvp; struct st_fragment_program *stfp; + struct translated_vertex_program *xvp; /* find active shader and params -- Should be covered by * ST_NEW_VERTEX_PROGRAM @@ -392,23 +283,17 @@ update_linkage( struct st_context *st ) stfp = st_fragment_program(st->ctx->FragmentProgram._Current); } + xvp = find_translated_vp(st, stvp, stfp); - pair = lookup_program_pair(st, stvp, stfp); - assert(pair); - link_shaders(st, pair); - - - /* Bind the vertex program and TGSI shader */ st->vp = stvp; - st->state.vs = pair->vs; + st->state.vs = xvp->vs; st->pipe->bind_vs_state(st->pipe, st->state.vs->data); - /* Bind the fragment program and TGSI shader */ st->fp = stfp; - st->state.fs = pair->fs; + st->state.fs = stfp->fs; st->pipe->bind_fs_state(st->pipe, st->state.fs->data); - st->vertex_result_to_slot = pair->vp_result_to_slot; + st->vertex_result_to_slot = xvp->output_to_slot; } diff --git a/src/mesa/state_tracker/st_cb_clear.c b/src/mesa/state_tracker/st_cb_clear.c index 3a70d2f121e..cca4fa19a90 100644 --- a/src/mesa/state_tracker/st_cb_clear.c +++ b/src/mesa/state_tracker/st_cb_clear.c @@ -156,7 +156,7 @@ make_frag_shader(struct st_context *st) stfp = (struct st_fragment_program *) p; st_translate_fragment_program(st, stfp, NULL, - stfp->tokens, ST_FP_MAX_TOKENS); + stfp->tokens, ST_MAX_SHADER_TOKENS); return stfp; } @@ -205,7 +205,7 @@ make_vertex_shader(struct st_context *st) stvp = (struct st_vertex_program *) p; st_translate_vertex_program(st, stvp, NULL, - stvp->tokens, ST_FP_MAX_TOKENS); + stvp->tokens, ST_MAX_SHADER_TOKENS); assert(stvp->vs); return stvp; diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c b/src/mesa/state_tracker/st_cb_drawpixels.c index fb89ceef8c2..f58b5d947db 100644 --- a/src/mesa/state_tracker/st_cb_drawpixels.c +++ b/src/mesa/state_tracker/st_cb_drawpixels.c @@ -131,7 +131,7 @@ make_fragment_shader(struct st_context *st, GLboolean bitmapMode) stfp = (struct st_fragment_program *) p; st_translate_fragment_program(st, stfp, NULL, - stfp->tokens, ST_FP_MAX_TOKENS); + stfp->tokens, ST_MAX_SHADER_TOKENS); return stfp; } @@ -203,7 +203,7 @@ make_vertex_shader(struct st_context *st, GLboolean passColor) stvp = (struct st_vertex_program *) p; st_translate_vertex_program(st, stvp, NULL, - stvp->tokens, ST_FP_MAX_TOKENS); + stvp->tokens, ST_MAX_SHADER_TOKENS); return stvp; } diff --git a/src/mesa/state_tracker/st_cb_program.c b/src/mesa/state_tracker/st_cb_program.c index 3a7ce9405e8..5c00dd1ae1c 100644 --- a/src/mesa/state_tracker/st_cb_program.c +++ b/src/mesa/state_tracker/st_cb_program.c @@ -165,6 +165,12 @@ static void st_program_string_notify( GLcontext *ctx, stfp->serialNo++; + if (stfp->fs) { + /* free the TGSI code */ + // cso_delete(stfp->vs); + stfp->fs = NULL; + } + stfp->param_state = stfp->Base.Base.Parameters->StateFlags; } else if (target == GL_VERTEX_PROGRAM_ARB) { @@ -172,6 +178,12 @@ static void st_program_string_notify( GLcontext *ctx, stvp->serialNo++; + if (stvp->vs) { + /* free the TGSI code */ + // cso_delete(stfp->vs); + stvp->vs = NULL; + } + stvp->param_state = stvp->Base.Base.Parameters->StateFlags; /* Also tell tnl about it: diff --git a/src/mesa/state_tracker/st_program.h b/src/mesa/state_tracker/st_program.h index 2b792013131..a714f3f5b01 100644 --- a/src/mesa/state_tracker/st_program.h +++ b/src/mesa/state_tracker/st_program.h @@ -38,20 +38,28 @@ #include "pipe/tgsi/exec/tgsi_token.h" #include "x86/rtasm/x86sse.h" -#define ST_FP_MAX_TOKENS 1024 + +#define ST_MAX_SHADER_TOKENS 1024 + struct cso_fragment_shader; struct cso_vertex_shader; +struct translated_vertex_program; + +/** + * Derived from Mesa gl_fragment_program: + */ struct st_fragment_program { struct gl_fragment_program Base; - GLboolean error; /* If program is malformed for any reason. */ - GLuint serialNo; + GLuint input_to_slot[FRAG_ATTRIB_MAX]; /**< Maps FRAG_ATTRIB_x to slot */ + GLuint num_input_slots; + /** The program in TGSI format */ - struct tgsi_token tokens[ST_FP_MAX_TOKENS]; + struct tgsi_token tokens[ST_MAX_SHADER_TOKENS]; #if defined(__i386__) || defined(__386__) struct x86_function sse2_program; @@ -61,23 +69,29 @@ struct st_fragment_program const struct cso_fragment_shader *fs; GLuint param_state; + + /** List of vertex programs which have been translated such that their + * outputs match this fragment program's inputs. + */ + struct translated_vertex_program *vertex_programs; }; +/** + * Derived from Mesa gl_fragment_program: + */ struct st_vertex_program { struct gl_vertex_program Base; /**< The Mesa vertex program */ - GLboolean error; /**< Set if program is malformed for any reason. */ - GLuint serialNo; /** maps a Mesa VERT_ATTRIB_x to a packed TGSI input index */ GLuint input_to_index[MAX_VERTEX_PROGRAM_ATTRIBS]; /** maps a TGSI input index back to a Mesa VERT_ATTRIB_x */ - GLuint index_to_input[MAX_VERTEX_PROGRAM_ATTRIBS]; + GLuint index_to_input[PIPE_MAX_SHADER_INPUTS]; /** The program in TGSI format */ - struct tgsi_token tokens[ST_FP_MAX_TOKENS]; + struct tgsi_token tokens[ST_MAX_SHADER_TOKENS]; /** Pointer to the corresponding cached shader */ const struct cso_vertex_shader *vs; -- 2.30.2