From: Eric Anholt Date: Tue, 27 Jul 2010 00:47:59 +0000 (-0700) Subject: Merge remote branch 'origin/master' into glsl2 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=afe125e0a18ac3886c45c7e6b02b122fb2d327b5;p=mesa.git Merge remote branch 'origin/master' into glsl2 This pulls in multiple i965 driver fixes which will help ensure better testing coverage during development, and also gets past the conflicts of the src/mesa/shader -> src/mesa/program move. Conflicts: src/mesa/Makefile src/mesa/main/shaderapi.c src/mesa/main/shaderobj.h --- afe125e0a18ac3886c45c7e6b02b122fb2d327b5 diff --cc src/glsl/Makefile index f4b0fb55a78,ca7f2d2ac7d..462d49e8840 --- a/src/glsl/Makefile +++ b/src/glsl/Makefile @@@ -4,158 -4,12 +4,158 @@@ TOP = ../. include $(TOP)/configs/current -SUBDIRS = pp cl apps +LIBNAME = glsl -default install clean: - @for dir in $(SUBDIRS) ; do \ - if [ -d $$dir ] ; then \ - (cd $$dir && $(MAKE) $@) || exit 1; \ - fi \ - done +LIBGLCPP_SOURCES = \ + glcpp/glcpp-lex.c \ + glcpp/glcpp-parse.c \ + glcpp/pp.c \ + glcpp/xtalloc.c +GLCPP_SOURCES = \ + $(LIBGLCPP_SOURCES) \ + glcpp/glcpp.c + +C_SOURCES = \ + $(LIBGLCPP_SOURCES) + +CXX_SOURCES = \ + ast_expr.cpp \ + ast_function.cpp \ + ast_to_hir.cpp \ + ast_type.cpp \ + builtin_function.cpp \ + glsl_lexer.cpp \ + glsl_parser.cpp \ + glsl_parser_extras.cpp \ + glsl_types.cpp \ + hir_field_selection.cpp \ + ir_basic_block.cpp \ + ir_clone.cpp \ + ir_constant_expression.cpp \ + ir_constant_folding.cpp \ + ir_constant_variable.cpp \ + ir_copy_propagation.cpp \ + ir.cpp \ + ir_dead_code.cpp \ + ir_dead_code_local.cpp \ + ir_div_to_mul_rcp.cpp \ + ir_expression_flattening.cpp \ + ir_function_can_inline.cpp \ + ir_function.cpp \ + ir_function_inlining.cpp \ + ir_hierarchical_visitor.cpp \ + ir_hv_accept.cpp \ + ir_if_return.cpp \ + ir_if_simplification.cpp \ + ir_if_to_cond_assign.cpp \ + ir_import_prototypes.cpp \ + ir_mat_op_to_vec.cpp \ + ir_mod_to_fract.cpp \ + ir_print_visitor.cpp \ + ir_reader.cpp \ + ir_swizzle_swizzle.cpp \ + ir_validate.cpp \ + ir_variable.cpp \ + ir_vec_index_to_cond_assign.cpp \ + ir_vec_index_to_swizzle.cpp \ + linker.cpp \ + link_functions.cpp \ + s_expression.cpp + +LIBS = \ + $(TOP)/src/glsl/libglsl.a \ + $(shell pkg-config --libs talloc) + +APPS = glsl_compiler glcpp/glcpp + +GLSL2_C_SOURCES = \ - ../mesa/shader/hash_table.c \ - ../mesa/shader/symbol_table.c ++ ../mesa/program/hash_table.c \ ++ ../mesa/program/symbol_table.c +GLSL2_CXX_SOURCES = \ + main.cpp + +GLSL2_OBJECTS = \ + $(GLSL2_C_SOURCES:.c=.o) \ + $(GLSL2_CXX_SOURCES:.cpp=.o) + +### Basic defines ### + +DEFINES = \ + $(LIBRARY_DEFINES) \ + $(API_DEFINES) + +GLCPP_OBJECTS = \ + $(GLCPP_SOURCES:.c=.o) \ - ../mesa/shader/hash_table.o ++ ../mesa/program/hash_table.o + +OBJECTS = \ + $(C_SOURCES:.c=.o) \ + $(CXX_SOURCES:.cpp=.o) + +INCLUDES = \ + -I. \ + -I../mesa \ + -I../mapi \ - -I../mesa/shader \ ++ -I../mesa/program \ + -I../../include \ + $(LIBRARY_INCLUDES) + +ALL_SOURCES = \ + $(C_SOURCES) \ + $(CXX_SOURCES) \ + $(GLSL2_CXX_SOURCES) \ + $(GLSL2_C_SOURCES) + +##### TARGETS ##### + +default: depend lib$(LIBNAME).a $(APPS) + +lib$(LIBNAME).a: $(OBJECTS) Makefile $(TOP)/src/glsl/Makefile.template + $(MKLIB) -cplusplus -o $(LIBNAME) -static $(OBJECTS) + +depend: $(ALL_SOURCES) Makefile + rm -f depend + touch depend + $(MKDEP) $(MKDEP_OPTIONS) $(INCLUDES) $(ALL_SOURCES) 2> /dev/null + +# Remove .o and backup files +clean: + rm -f $(OBJECTS) lib$(LIBNAME).a depend depend.bak + -rm -f $(APPS) + +# Dummy target +install: + @echo -n "" + + +##### RULES ##### + +glsl_compiler: $(GLSL2_OBJECTS) libglsl.a + $(APP_CXX) $(INCLUDES) $(CFLAGS) $(LDFLAGS) $(GLSL2_OBJECTS) $(LIBS) -o $@ + +glcpp/glcpp: $(GLCPP_OBJECTS) libglsl.a + $(APP_CC) $(INCLUDES) $(CFLAGS) $(LDFLAGS) $(GLCPP_OBJECTS) $(LIBS) -o $@ + +.cpp.o: + $(CXX) -c $(INCLUDES) $(CXXFLAGS) $(DEFINES) $< -o $@ + +.c.o: + $(CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@ + +glsl_lexer.cpp: glsl_lexer.lpp + flex --never-interactive --outfile="$@" $< + +glsl_parser.cpp: glsl_parser.ypp + bison -v -o "$@" --defines=glsl_parser.h $< + +glcpp/glcpp-lex.c: glcpp/glcpp-lex.l + flex --never-interactive --outfile="$@" $< + +glcpp/glcpp-parse.c: glcpp/glcpp-parse.y + bison -v -o "$@" --defines=glcpp/glcpp-parse.h $< + +builtin_function.cpp: builtins/*/* + ./builtins/tools/generate_builtins.pl > builtin_function.cpp + +-include depend diff --cc src/glsl/glsl_symbol_table.h index 8fbc66c974d,00000000000..27e825597c5 mode 100644,000000..100644 --- a/src/glsl/glsl_symbol_table.h +++ b/src/glsl/glsl_symbol_table.h @@@ -1,165 -1,0 +1,165 @@@ +/* -*- c++ -*- */ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#pragma once +#ifndef GLSL_SYMBOL_TABLE +#define GLSL_SYMBOL_TABLE + +#include + +extern "C" { - #include "symbol_table.h" ++#include "program/symbol_table.h" +} +#include "ir.h" +#include "glsl_types.h" + +/** + * Facade class for _mesa_symbol_table + * + * Wraps the existing \c _mesa_symbol_table data structure to enforce some + * type safe and some symbol table invariants. + */ +class glsl_symbol_table { +private: + enum glsl_symbol_name_space { + glsl_variable_name_space = 0, + glsl_type_name_space = 1, + glsl_function_name_space = 2 + }; + + static int + _glsl_symbol_table_destructor (glsl_symbol_table *table) + { + table->~glsl_symbol_table(); + + return 0; + } + +public: + /* Callers of this talloc-based new need not call delete. It's + * easier to just talloc_free 'ctx' (or any of its ancestors). */ + static void* operator new(size_t size, void *ctx) + { + void *table; + + table = talloc_size(ctx, size); + assert(table != NULL); + + talloc_set_destructor(table, (int (*)(void*)) _glsl_symbol_table_destructor); + + return table; + } + + /* If the user *does* call delete, that's OK, we will just + * talloc_free in that case. Here, C++ will have already called the + * destructor so tell talloc not to do that again. */ + static void operator delete(void *table) + { + talloc_set_destructor(table, NULL); + talloc_free(table); + } + + glsl_symbol_table() + { + table = _mesa_symbol_table_ctor(); + } + + ~glsl_symbol_table() + { + _mesa_symbol_table_dtor(table); + } + + void push_scope() + { + _mesa_symbol_table_push_scope(table); + } + + void pop_scope() + { + _mesa_symbol_table_pop_scope(table); + } + + /** + * Determine whether a name was declared at the current scope + */ + bool name_declared_this_scope(const char *name) + { + return _mesa_symbol_table_symbol_scope(table, -1, name) == 0; + } + + /** + * \name Methods to add symbols to the table + * + * There is some temptation to rename all these functions to \c add_symbol + * or similar. However, this breaks symmetry with the getter functions and + * reduces the clarity of the intention of code that uses these methods. + */ + /*@{*/ + bool add_variable(const char *name, ir_variable *v) + { + return _mesa_symbol_table_add_symbol(table, glsl_variable_name_space, + name, v) == 0; + } + + bool add_type(const char *name, const glsl_type *t) + { + return _mesa_symbol_table_add_symbol(table, glsl_type_name_space, + name, (void *) t) == 0; + } + + bool add_function(const char *name, ir_function *f) + { + return _mesa_symbol_table_add_symbol(table, glsl_function_name_space, + name, f) == 0; + } + /*@}*/ + + /** + * \name Methods to get symbols from the table + */ + /*@{*/ + ir_variable *get_variable(const char *name) + { + return (ir_variable *) + _mesa_symbol_table_find_symbol(table, glsl_variable_name_space, name); + } + + glsl_type *get_type(const char *name) + { + return (glsl_type *) + _mesa_symbol_table_find_symbol(table, glsl_type_name_space, name); + } + + ir_function *get_function(const char *name) + { + return (ir_function *) + _mesa_symbol_table_find_symbol(table, glsl_function_name_space, name); + } + /*@}*/ + +private: + struct _mesa_symbol_table *table; +}; + +#endif /* GLSL_SYMBOL_TABLE */ diff --cc src/glsl/linker.cpp index 7c30a40a6ce,00000000000..ea0274eac33 mode 100644,000000..100644 --- a/src/glsl/linker.cpp +++ b/src/glsl/linker.cpp @@@ -1,1305 -1,0 +1,1305 @@@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file linker.cpp + * GLSL linker implementation + * + * Given a set of shaders that are to be linked to generate a final program, + * there are three distinct stages. + * + * In the first stage shaders are partitioned into groups based on the shader + * type. All shaders of a particular type (e.g., vertex shaders) are linked + * together. + * + * - Undefined references in each shader are resolve to definitions in + * another shader. + * - Types and qualifiers of uniforms, outputs, and global variables defined + * in multiple shaders with the same name are verified to be the same. + * - Initializers for uniforms and global variables defined + * in multiple shaders with the same name are verified to be the same. + * + * The result, in the terminology of the GLSL spec, is a set of shader + * executables for each processing unit. + * + * After the first stage is complete, a series of semantic checks are performed + * on each of the shader executables. + * + * - Each shader executable must define a \c main function. + * - Each vertex shader executable must write to \c gl_Position. + * - Each fragment shader executable must write to either \c gl_FragData or + * \c gl_FragColor. + * + * In the final stage individual shader executables are linked to create a + * complete exectuable. + * + * - Types of uniforms defined in multiple shader stages with the same name + * are verified to be the same. + * - Initializers for uniforms defined in multiple shader stages with the + * same name are verified to be the same. + * - Types and qualifiers of outputs defined in one stage are verified to + * be the same as the types and qualifiers of inputs defined with the same + * name in a later stage. + * + * \author Ian Romanick + */ +#include +#include +#include +#include + +extern "C" { +#include +} + +#include "main/mtypes.h" +#include "main/macros.h" ++#include "main/shaderobj.h" +#include "glsl_symbol_table.h" +#include "ir.h" +#include "program.h" +#include "hash_table.h" - #include "shader_api.h" +#include "linker.h" +#include "ir_optimization.h" + +/** + * Visitor that determines whether or not a variable is ever written. + */ +class find_assignment_visitor : public ir_hierarchical_visitor { +public: + find_assignment_visitor(const char *name) + : name(name), found(false) + { + /* empty */ + } + + virtual ir_visitor_status visit_enter(ir_assignment *ir) + { + ir_variable *const var = ir->lhs->variable_referenced(); + + if (strcmp(name, var->name) == 0) { + found = true; + return visit_stop; + } + + return visit_continue_with_parent; + } + + bool variable_found() + { + return found; + } + +private: + const char *name; /**< Find writes to a variable with this name. */ + bool found; /**< Was a write to the variable found? */ +}; + + +void +linker_error_printf(gl_shader_program *prog, const char *fmt, ...) +{ + va_list ap; + + prog->InfoLog = talloc_strdup_append(prog->InfoLog, "error: "); + va_start(ap, fmt); + prog->InfoLog = talloc_vasprintf_append(prog->InfoLog, fmt, ap); + va_end(ap); +} + + +void +invalidate_variable_locations(gl_shader *sh, enum ir_variable_mode mode, + int generic_base) +{ + foreach_list(node, sh->ir) { + ir_variable *const var = ((ir_instruction *) node)->as_variable(); + + if ((var == NULL) || (var->mode != (unsigned) mode)) + continue; + + /* Only assign locations for generic attributes / varyings / etc. + */ + if (var->location >= generic_base) + var->location = -1; + } +} + + +/** + * Determine the number of attribute slots required for a particular type + * + * This code is here because it implements the language rules of a specific + * GLSL version. Since it's a property of the language and not a property of + * types in general, it doesn't really belong in glsl_type. + */ +unsigned +count_attribute_slots(const glsl_type *t) +{ + /* From page 31 (page 37 of the PDF) of the GLSL 1.50 spec: + * + * "A scalar input counts the same amount against this limit as a vec4, + * so applications may want to consider packing groups of four + * unrelated float inputs together into a vector to better utilize the + * capabilities of the underlying hardware. A matrix input will use up + * multiple locations. The number of locations used will equal the + * number of columns in the matrix." + * + * The spec does not explicitly say how arrays are counted. However, it + * should be safe to assume the total number of slots consumed by an array + * is the number of entries in the array multiplied by the number of slots + * consumed by a single element of the array. + */ + + if (t->is_array()) + return t->array_size() * count_attribute_slots(t->element_type()); + + if (t->is_matrix()) + return t->matrix_columns; + + return 1; +} + + +/** + * Verify that a vertex shader executable meets all semantic requirements + * + * \param shader Vertex shader executable to be verified + */ +bool +validate_vertex_shader_executable(struct gl_shader_program *prog, + struct gl_shader *shader) +{ + if (shader == NULL) + return true; + + find_assignment_visitor find("gl_Position"); + find.run(shader->ir); + if (!find.variable_found()) { + linker_error_printf(prog, + "vertex shader does not write to `gl_Position'\n"); + return false; + } + + return true; +} + + +/** + * Verify that a fragment shader executable meets all semantic requirements + * + * \param shader Fragment shader executable to be verified + */ +bool +validate_fragment_shader_executable(struct gl_shader_program *prog, + struct gl_shader *shader) +{ + if (shader == NULL) + return true; + + find_assignment_visitor frag_color("gl_FragColor"); + find_assignment_visitor frag_data("gl_FragData"); + + frag_color.run(shader->ir); + frag_data.run(shader->ir); + + if (frag_color.variable_found() && frag_data.variable_found()) { + linker_error_printf(prog, "fragment shader writes to both " + "`gl_FragColor' and `gl_FragData'\n"); + return false; + } + + return true; +} + + +/** + * Generate a string describing the mode of a variable + */ +static const char * +mode_string(const ir_variable *var) +{ + switch (var->mode) { + case ir_var_auto: + return (var->read_only) ? "global constant" : "global variable"; + + case ir_var_uniform: return "uniform"; + case ir_var_in: return "shader input"; + case ir_var_out: return "shader output"; + case ir_var_inout: return "shader inout"; + + case ir_var_temporary: + default: + assert(!"Should not get here."); + return "invalid variable"; + } +} + + +/** + * Perform validation of global variables used across multiple shaders + */ +bool +cross_validate_globals(struct gl_shader_program *prog, + struct gl_shader **shader_list, + unsigned num_shaders, + bool uniforms_only) +{ + /* Examine all of the uniforms in all of the shaders and cross validate + * them. + */ + glsl_symbol_table variables; + for (unsigned i = 0; i < num_shaders; i++) { + foreach_list(node, shader_list[i]->ir) { + ir_variable *const var = ((ir_instruction *) node)->as_variable(); + + if (var == NULL) + continue; + + if (uniforms_only && (var->mode != ir_var_uniform)) + continue; + + /* Don't cross validate temporaries that are at global scope. These + * will eventually get pulled into the shaders 'main'. + */ + if (var->mode == ir_var_temporary) + continue; + + /* If a global with this name has already been seen, verify that the + * new instance has the same type. In addition, if the globals have + * initializers, the values of the initializers must be the same. + */ + ir_variable *const existing = variables.get_variable(var->name); + if (existing != NULL) { + if (var->type != existing->type) { + linker_error_printf(prog, "%s `%s' declared as type " + "`%s' and type `%s'\n", + mode_string(var), + var->name, var->type->name, + existing->type->name); + return false; + } + + /* FINISHME: Handle non-constant initializers. + */ + if (var->constant_value != NULL) { + if (existing->constant_value != NULL) { + if (!var->constant_value->has_value(existing->constant_value)) { + linker_error_printf(prog, "initializers for %s " + "`%s' have differing values\n", + mode_string(var), var->name); + return false; + } + } else + /* If the first-seen instance of a particular uniform did not + * have an initializer but a later instance does, copy the + * initializer to the version stored in the symbol table. + */ + /* FINISHME: This is wrong. The constant_value field should + * FINISHME: not be modified! Imagine a case where a shader + * FINISHME: without an initializer is linked in two different + * FINISHME: programs with shaders that have differing + * FINISHME: initializers. Linking with the first will + * FINISHME: modify the shader, and linking with the second + * FINISHME: will fail. + */ + existing->constant_value = var->constant_value->clone(NULL); + } + } else + variables.add_variable(var->name, var); + } + } + + return true; +} + + +/** + * Perform validation of uniforms used across multiple shader stages + */ +bool +cross_validate_uniforms(struct gl_shader_program *prog) +{ + return cross_validate_globals(prog, prog->_LinkedShaders, + prog->_NumLinkedShaders, true); +} + + +/** + * Validate that outputs from one stage match inputs of another + */ +bool +cross_validate_outputs_to_inputs(struct gl_shader_program *prog, + gl_shader *producer, gl_shader *consumer) +{ + glsl_symbol_table parameters; + /* FINISHME: Figure these out dynamically. */ + const char *const producer_stage = "vertex"; + const char *const consumer_stage = "fragment"; + + /* Find all shader outputs in the "producer" stage. + */ + foreach_list(node, producer->ir) { + ir_variable *const var = ((ir_instruction *) node)->as_variable(); + + /* FINISHME: For geometry shaders, this should also look for inout + * FINISHME: variables. + */ + if ((var == NULL) || (var->mode != ir_var_out)) + continue; + + parameters.add_variable(var->name, var); + } + + + /* Find all shader inputs in the "consumer" stage. Any variables that have + * matching outputs already in the symbol table must have the same type and + * qualifiers. + */ + foreach_list(node, consumer->ir) { + ir_variable *const input = ((ir_instruction *) node)->as_variable(); + + /* FINISHME: For geometry shaders, this should also look for inout + * FINISHME: variables. + */ + if ((input == NULL) || (input->mode != ir_var_in)) + continue; + + ir_variable *const output = parameters.get_variable(input->name); + if (output != NULL) { + /* Check that the types match between stages. + */ + if (input->type != output->type) { + linker_error_printf(prog, + "%s shader output `%s' delcared as " + "type `%s', but %s shader input declared " + "as type `%s'\n", + producer_stage, output->name, + output->type->name, + consumer_stage, input->type->name); + return false; + } + + /* Check that all of the qualifiers match between stages. + */ + if (input->centroid != output->centroid) { + linker_error_printf(prog, + "%s shader output `%s' %s centroid qualifier, " + "but %s shader input %s centroid qualifier\n", + producer_stage, + output->name, + (output->centroid) ? "has" : "lacks", + consumer_stage, + (input->centroid) ? "has" : "lacks"); + return false; + } + + if (input->invariant != output->invariant) { + linker_error_printf(prog, + "%s shader output `%s' %s invariant qualifier, " + "but %s shader input %s invariant qualifier\n", + producer_stage, + output->name, + (output->invariant) ? "has" : "lacks", + consumer_stage, + (input->invariant) ? "has" : "lacks"); + return false; + } + + if (input->interpolation != output->interpolation) { + linker_error_printf(prog, + "%s shader output `%s' specifies %s " + "interpolation qualifier, " + "but %s shader input specifies %s " + "interpolation qualifier\n", + producer_stage, + output->name, + output->interpolation_string(), + consumer_stage, + input->interpolation_string()); + return false; + } + } + } + + return true; +} + + +/** + * Populates a shaders symbol table with all global declarations + */ +static void +populate_symbol_table(gl_shader *sh) +{ + sh->symbols = new(sh) glsl_symbol_table; + + foreach_list(node, sh->ir) { + ir_instruction *const inst = (ir_instruction *) node; + ir_variable *var; + ir_function *func; + + if ((func = inst->as_function()) != NULL) { + sh->symbols->add_function(func->name, func); + } else if ((var = inst->as_variable()) != NULL) { + sh->symbols->add_variable(var->name, var); + } + } +} + + +/** + * Remap variables referenced in an instruction tree + * + * This is used when instruction trees are cloned from one shader and placed in + * another. These trees will contain references to \c ir_variable nodes that + * do not exist in the target shader. This function finds these \c ir_variable + * references and replaces the references with matching variables in the target + * shader. + * + * If there is no matching variable in the target shader, a clone of the + * \c ir_variable is made and added to the target shader. The new variable is + * added to \b both the instruction stream and the symbol table. + * + * \param inst IR tree that is to be processed. + * \param symbols Symbol table containing global scope symbols in the + * linked shader. + * \param instructions Instruction stream where new variable declarations + * should be added. + */ +void +remap_variables(ir_instruction *inst, glsl_symbol_table *symbols, + exec_list *instructions, hash_table *temps) +{ + class remap_visitor : public ir_hierarchical_visitor { + public: + remap_visitor(glsl_symbol_table *symbols, exec_list *instructions, + hash_table *temps) + { + this->symbols = symbols; + this->instructions = instructions; + this->temps = temps; + } + + virtual ir_visitor_status visit(ir_dereference_variable *ir) + { + if (ir->var->mode == ir_var_temporary) { + ir_variable *var = (ir_variable *) hash_table_find(temps, ir->var); + + assert(var != NULL); + ir->var = var; + return visit_continue; + } + + ir_variable *const existing = + this->symbols->get_variable(ir->var->name); + if (existing != NULL) + ir->var = existing; + else { + ir_variable *copy = ir->var->clone(NULL); + + this->symbols->add_variable(copy->name, copy); + this->instructions->push_head(copy); + ir->var = copy; + } + + return visit_continue; + } + + private: + glsl_symbol_table *symbols; + exec_list *instructions; + hash_table *temps; + }; + + remap_visitor v(symbols, instructions, temps); + + inst->accept(&v); +} + + +/** + * Move non-declarations from one instruction stream to another + * + * The intended usage pattern of this function is to pass the pointer to the + * head sentinal of a list (i.e., a pointer to the list cast to an \c exec_node + * pointer) for \c last and \c false for \c make_copies on the first + * call. Successive calls pass the return value of the previous call for + * \c last and \c true for \c make_copies. + * + * \param instructions Source instruction stream + * \param last Instruction after which new instructions should be + * inserted in the target instruction stream + * \param make_copies Flag selecting whether instructions in \c instructions + * should be copied (via \c ir_instruction::clone) into the + * target list or moved. + * + * \return + * The new "last" instruction in the target instruction stream. This pointer + * is suitable for use as the \c last parameter of a later call to this + * function. + */ +exec_node * +move_non_declarations(exec_list *instructions, exec_node *last, + bool make_copies, gl_shader *target) +{ + hash_table *temps = NULL; + + if (make_copies) + temps = hash_table_ctor(0, hash_table_pointer_hash, + hash_table_pointer_compare); + + foreach_list_safe(node, instructions) { + ir_instruction *inst = (ir_instruction *) node; + + if (inst->as_function()) + continue; + + ir_variable *var = inst->as_variable(); + if ((var != NULL) && (var->mode != ir_var_temporary)) + continue; + + assert(inst->as_assignment() + || ((var != NULL) && (var->mode == ir_var_temporary))); + + if (make_copies) { + inst = inst->clone(NULL); + + if (var != NULL) + hash_table_insert(temps, inst, var); + else + remap_variables(inst, target->symbols, target->ir, temps); + } else { + inst->remove(); + } + + last->insert_after(inst); + last = inst; + } + + if (make_copies) + hash_table_dtor(temps); + + return last; +} + +/** + * Get the function signature for main from a shader + */ +static ir_function_signature * +get_main_function_signature(gl_shader *sh) +{ + ir_function *const f = sh->symbols->get_function("main"); + if (f != NULL) { + exec_list void_parameters; + + /* Look for the 'void main()' signature and ensure that it's defined. + * This keeps the linker from accidentally pick a shader that just + * contains a prototype for main. + * + * We don't have to check for multiple definitions of main (in multiple + * shaders) because that would have already been caught above. + */ + ir_function_signature *sig = f->matching_signature(&void_parameters); + if ((sig != NULL) && sig->is_defined) { + return sig; + } + } + + return NULL; +} + + +/** + * Combine a group of shaders for a single stage to generate a linked shader + * + * \note + * If this function is supplied a single shader, it is cloned, and the new + * shader is returned. + */ +static struct gl_shader * +link_intrastage_shaders(struct gl_shader_program *prog, + struct gl_shader **shader_list, + unsigned num_shaders) +{ + /* Check that global variables defined in multiple shaders are consistent. + */ + if (!cross_validate_globals(prog, shader_list, num_shaders, false)) + return NULL; + + /* Check that there is only a single definition of each function signature + * across all shaders. + */ + for (unsigned i = 0; i < (num_shaders - 1); i++) { + foreach_list(node, shader_list[i]->ir) { + ir_function *const f = ((ir_instruction *) node)->as_function(); + + if (f == NULL) + continue; + + for (unsigned j = i + 1; j < num_shaders; j++) { + ir_function *const other = + shader_list[j]->symbols->get_function(f->name); + + /* If the other shader has no function (and therefore no function + * signatures) with the same name, skip to the next shader. + */ + if (other == NULL) + continue; + + foreach_iter (exec_list_iterator, iter, *f) { + ir_function_signature *sig = + (ir_function_signature *) iter.get(); + + if (!sig->is_defined || sig->is_built_in) + continue; + + ir_function_signature *other_sig = + other->exact_matching_signature(& sig->parameters); + + if ((other_sig != NULL) && other_sig->is_defined + && !other_sig->is_built_in) { + linker_error_printf(prog, + "function `%s' is multiply defined", + f->name); + return NULL; + } + } + } + } + } + + /* Find the shader that defines main, and make a clone of it. + * + * Starting with the clone, search for undefined references. If one is + * found, find the shader that defines it. Clone the reference and add + * it to the shader. Repeat until there are no undefined references or + * until a reference cannot be resolved. + */ + gl_shader *main = NULL; + for (unsigned i = 0; i < num_shaders; i++) { + if (get_main_function_signature(shader_list[i]) != NULL) { + main = shader_list[i]; + break; + } + } + + if (main == NULL) { + linker_error_printf(prog, "%s shader lacks `main'\n", + (shader_list[0]->Type == GL_VERTEX_SHADER) + ? "vertex" : "fragment"); + return NULL; + } + + gl_shader *const linked = _mesa_new_shader(NULL, 0, main->Type); + linked->ir = new(linked) exec_list; + clone_ir_list(linked->ir, main->ir); + + populate_symbol_table(linked); + + /* The a pointer to the main function in the final linked shader (i.e., the + * copy of the original shader that contained the main function). + */ + ir_function_signature *const main_sig = get_main_function_signature(linked); + + /* Move any instructions other than variable declarations or function + * declarations into main. + */ + exec_node *insertion_point = + move_non_declarations(linked->ir, (exec_node *) &main_sig->body, false, + linked); + + for (unsigned i = 0; i < num_shaders; i++) { + if (shader_list[i] == main) + continue; + + insertion_point = move_non_declarations(shader_list[i]->ir, + insertion_point, true, linked); + } + + /* Resolve initializers for global variables in the linked shader. + */ + unsigned num_linking_shaders = num_shaders; + for (unsigned i = 0; i < num_shaders; i++) + num_linking_shaders += shader_list[i]->num_builtins_to_link; + + gl_shader **linking_shaders = + (gl_shader **) calloc(num_linking_shaders, sizeof(gl_shader *)); + + memcpy(linking_shaders, shader_list, + sizeof(linking_shaders[0]) * num_shaders); + + unsigned idx = num_shaders; + for (unsigned i = 0; i < num_shaders; i++) { + memcpy(&linking_shaders[idx], shader_list[i]->builtins_to_link, + sizeof(linking_shaders[0]) * shader_list[i]->num_builtins_to_link); + idx += shader_list[i]->num_builtins_to_link; + } + + assert(idx == num_linking_shaders); + + link_function_calls(prog, linked, linking_shaders, num_linking_shaders); + + free(linking_shaders); + + return linked; +} + + +struct uniform_node { + exec_node link; + struct gl_uniform *u; + unsigned slots; +}; + +void +assign_uniform_locations(struct gl_shader_program *prog) +{ + /* */ + exec_list uniforms; + unsigned total_uniforms = 0; + hash_table *ht = hash_table_ctor(32, hash_table_string_hash, + hash_table_string_compare); + + for (unsigned i = 0; i < prog->_NumLinkedShaders; i++) { + unsigned next_position = 0; + + foreach_list(node, prog->_LinkedShaders[i]->ir) { + ir_variable *const var = ((ir_instruction *) node)->as_variable(); + + if ((var == NULL) || (var->mode != ir_var_uniform)) + continue; + + const unsigned vec4_slots = (var->component_slots() + 3) / 4; + assert(vec4_slots != 0); + + uniform_node *n = (uniform_node *) hash_table_find(ht, var->name); + if (n == NULL) { + n = (uniform_node *) calloc(1, sizeof(struct uniform_node)); + n->u = (gl_uniform *) calloc(vec4_slots, sizeof(struct gl_uniform)); + n->slots = vec4_slots; + + n->u[0].Name = strdup(var->name); + for (unsigned j = 1; j < vec4_slots; j++) + n->u[j].Name = n->u[0].Name; + + hash_table_insert(ht, n, n->u[0].Name); + uniforms.push_tail(& n->link); + total_uniforms += vec4_slots; + } + + if (var->constant_value != NULL) + for (unsigned j = 0; j < vec4_slots; j++) + n->u[j].Initialized = true; + + var->location = next_position; + + for (unsigned j = 0; j < vec4_slots; j++) { + switch (prog->_LinkedShaders[i]->Type) { + case GL_VERTEX_SHADER: + n->u[j].VertPos = next_position; + break; + case GL_FRAGMENT_SHADER: + n->u[j].FragPos = next_position; + break; + case GL_GEOMETRY_SHADER: + /* FINISHME: Support geometry shaders. */ + assert(prog->_LinkedShaders[i]->Type != GL_GEOMETRY_SHADER); + break; + } + + next_position++; + } + } + } + + gl_uniform_list *ul = (gl_uniform_list *) + calloc(1, sizeof(gl_uniform_list)); + + ul->Size = total_uniforms; + ul->NumUniforms = total_uniforms; + ul->Uniforms = (gl_uniform *) calloc(total_uniforms, sizeof(gl_uniform)); + + unsigned idx = 0; + uniform_node *next; + for (uniform_node *node = (uniform_node *) uniforms.head + ; node->link.next != NULL + ; node = next) { + next = (uniform_node *) node->link.next; + + node->link.remove(); + memcpy(&ul->Uniforms[idx], node->u, sizeof(gl_uniform) * node->slots); + idx += node->slots; + + free(node->u); + free(node); + } + + hash_table_dtor(ht); + + prog->Uniforms = ul; +} + + +/** + * Find a contiguous set of available bits in a bitmask + * + * \param used_mask Bits representing used (1) and unused (0) locations + * \param needed_count Number of contiguous bits needed. + * + * \return + * Base location of the available bits on success or -1 on failure. + */ +int +find_available_slots(unsigned used_mask, unsigned needed_count) +{ + unsigned needed_mask = (1 << needed_count) - 1; + const int max_bit_to_test = (8 * sizeof(used_mask)) - needed_count; + + /* The comparison to 32 is redundant, but without it GCC emits "warning: + * cannot optimize possibly infinite loops" for the loop below. + */ + if ((needed_count == 0) || (max_bit_to_test < 0) || (max_bit_to_test > 32)) + return -1; + + for (int i = 0; i <= max_bit_to_test; i++) { + if ((needed_mask & ~used_mask) == needed_mask) + return i; + + needed_mask <<= 1; + } + + return -1; +} + + +bool +assign_attribute_locations(gl_shader_program *prog, unsigned max_attribute_index) +{ + /* Mark invalid attribute locations as being used. + */ + unsigned used_locations = (max_attribute_index >= 32) + ? ~0 : ~((1 << max_attribute_index) - 1); + + gl_shader *const sh = prog->_LinkedShaders[0]; + assert(sh->Type == GL_VERTEX_SHADER); + + /* Operate in a total of four passes. + * + * 1. Invalidate the location assignments for all vertex shader inputs. + * + * 2. Assign locations for inputs that have user-defined (via + * glBindVertexAttribLocation) locatoins. + * + * 3. Sort the attributes without assigned locations by number of slots + * required in decreasing order. Fragmentation caused by attribute + * locations assigned by the application may prevent large attributes + * from having enough contiguous space. + * + * 4. Assign locations to any inputs without assigned locations. + */ + + invalidate_variable_locations(sh, ir_var_in, VERT_ATTRIB_GENERIC0); + + if (prog->Attributes != NULL) { + for (unsigned i = 0; i < prog->Attributes->NumParameters; i++) { + ir_variable *const var = + sh->symbols->get_variable(prog->Attributes->Parameters[i].Name); + + /* Note: attributes that occupy multiple slots, such as arrays or + * matrices, may appear in the attrib array multiple times. + */ + if ((var == NULL) || (var->location != -1)) + continue; + + /* From page 61 of the OpenGL 4.0 spec: + * + * "LinkProgram will fail if the attribute bindings assigned by + * BindAttribLocation do not leave not enough space to assign a + * location for an active matrix attribute or an active attribute + * array, both of which require multiple contiguous generic + * attributes." + * + * Previous versions of the spec contain similar language but omit the + * bit about attribute arrays. + * + * Page 61 of the OpenGL 4.0 spec also says: + * + * "It is possible for an application to bind more than one + * attribute name to the same location. This is referred to as + * aliasing. This will only work if only one of the aliased + * attributes is active in the executable program, or if no path + * through the shader consumes more than one attribute of a set + * of attributes aliased to the same location. A link error can + * occur if the linker determines that every path through the + * shader consumes multiple aliased attributes, but + * implementations are not required to generate an error in this + * case." + * + * These two paragraphs are either somewhat contradictory, or I don't + * fully understand one or both of them. + */ + /* FINISHME: The code as currently written does not support attribute + * FINISHME: location aliasing (see comment above). + */ + const int attr = prog->Attributes->Parameters[i].StateIndexes[0]; + const unsigned slots = count_attribute_slots(var->type); + + /* Mask representing the contiguous slots that will be used by this + * attribute. + */ + const unsigned use_mask = (1 << slots) - 1; + + /* Generate a link error if the set of bits requested for this + * attribute overlaps any previously allocated bits. + */ + if ((~(use_mask << attr) & used_locations) != used_locations) { + linker_error_printf(prog, + "insufficient contiguous attribute locations " + "available for vertex shader input `%s'", + var->name); + return false; + } + + var->location = VERT_ATTRIB_GENERIC0 + attr; + used_locations |= (use_mask << attr); + } + } + + /* Temporary storage for the set of attributes that need locations assigned. + */ + struct temp_attr { + unsigned slots; + ir_variable *var; + + /* Used below in the call to qsort. */ + static int compare(const void *a, const void *b) + { + const temp_attr *const l = (const temp_attr *) a; + const temp_attr *const r = (const temp_attr *) b; + + /* Reversed because we want a descending order sort below. */ + return r->slots - l->slots; + } + } to_assign[16]; + + unsigned num_attr = 0; + + foreach_list(node, sh->ir) { + ir_variable *const var = ((ir_instruction *) node)->as_variable(); + + if ((var == NULL) || (var->mode != ir_var_in)) + continue; + + /* The location was explicitly assigned, nothing to do here. + */ + if (var->location != -1) + continue; + + to_assign[num_attr].slots = count_attribute_slots(var->type); + to_assign[num_attr].var = var; + num_attr++; + } + + /* If all of the attributes were assigned locations by the application (or + * are built-in attributes with fixed locations), return early. This should + * be the common case. + */ + if (num_attr == 0) + return true; + + qsort(to_assign, num_attr, sizeof(to_assign[0]), temp_attr::compare); + + /* VERT_ATTRIB_GENERIC0 is a psdueo-alias for VERT_ATTRIB_POS. It can only + * be explicitly assigned by via glBindAttribLocation. Mark it as reserved + * to prevent it from being automatically allocated below. + */ + used_locations |= (1 << 0); + + for (unsigned i = 0; i < num_attr; i++) { + /* Mask representing the contiguous slots that will be used by this + * attribute. + */ + const unsigned use_mask = (1 << to_assign[i].slots) - 1; + + int location = find_available_slots(used_locations, to_assign[i].slots); + + if (location < 0) { + linker_error_printf(prog, + "insufficient contiguous attribute locations " + "available for vertex shader input `%s'", + to_assign[i].var->name); + return false; + } + + to_assign[i].var->location = VERT_ATTRIB_GENERIC0 + location; + used_locations |= (use_mask << location); + } + + return true; +} + + +void +assign_varying_locations(gl_shader *producer, gl_shader *consumer) +{ + /* FINISHME: Set dynamically when geometry shader support is added. */ + unsigned output_index = VERT_RESULT_VAR0; + unsigned input_index = FRAG_ATTRIB_VAR0; + + /* Operate in a total of three passes. + * + * 1. Assign locations for any matching inputs and outputs. + * + * 2. Mark output variables in the producer that do not have locations as + * not being outputs. This lets the optimizer eliminate them. + * + * 3. Mark input variables in the consumer that do not have locations as + * not being inputs. This lets the optimizer eliminate them. + */ + + invalidate_variable_locations(producer, ir_var_out, VERT_RESULT_VAR0); + invalidate_variable_locations(consumer, ir_var_in, FRAG_ATTRIB_VAR0); + + foreach_list(node, producer->ir) { + ir_variable *const output_var = ((ir_instruction *) node)->as_variable(); + + if ((output_var == NULL) || (output_var->mode != ir_var_out) + || (output_var->location != -1)) + continue; + + ir_variable *const input_var = + consumer->symbols->get_variable(output_var->name); + + if ((input_var == NULL) || (input_var->mode != ir_var_in)) + continue; + + assert(input_var->location == -1); + + /* FINISHME: Location assignment will need some changes when arrays, + * FINISHME: matrices, and structures are allowed as shader inputs / + * FINISHME: outputs. + */ + output_var->location = output_index; + input_var->location = input_index; + + output_index++; + input_index++; + } + + foreach_list(node, producer->ir) { + ir_variable *const var = ((ir_instruction *) node)->as_variable(); + + if ((var == NULL) || (var->mode != ir_var_out)) + continue; + + /* An 'out' variable is only really a shader output if its value is read + * by the following stage. + */ + if (var->location == -1) { + var->shader_out = false; + var->mode = ir_var_auto; + } + } + + foreach_list(node, consumer->ir) { + ir_variable *const var = ((ir_instruction *) node)->as_variable(); + + if ((var == NULL) || (var->mode != ir_var_in)) + continue; + + /* An 'in' variable is only really a shader input if its value is written + * by the previous stage. + */ + var->shader_in = (var->location != -1); + } +} + + +void +link_shaders(struct gl_shader_program *prog) +{ + prog->LinkStatus = false; + prog->Validated = false; + prog->_Used = false; + + if (prog->InfoLog != NULL) + talloc_free(prog->InfoLog); + + prog->InfoLog = talloc_strdup(NULL, ""); + + /* Separate the shaders into groups based on their type. + */ + struct gl_shader **vert_shader_list; + unsigned num_vert_shaders = 0; + struct gl_shader **frag_shader_list; + unsigned num_frag_shaders = 0; + + vert_shader_list = (struct gl_shader **) + calloc(2 * prog->NumShaders, sizeof(struct gl_shader *)); + frag_shader_list = &vert_shader_list[prog->NumShaders]; + + unsigned min_version = UINT_MAX; + unsigned max_version = 0; + for (unsigned i = 0; i < prog->NumShaders; i++) { + min_version = MIN2(min_version, prog->Shaders[i]->Version); + max_version = MAX2(max_version, prog->Shaders[i]->Version); + + switch (prog->Shaders[i]->Type) { + case GL_VERTEX_SHADER: + vert_shader_list[num_vert_shaders] = prog->Shaders[i]; + num_vert_shaders++; + break; + case GL_FRAGMENT_SHADER: + frag_shader_list[num_frag_shaders] = prog->Shaders[i]; + num_frag_shaders++; + break; + case GL_GEOMETRY_SHADER: + /* FINISHME: Support geometry shaders. */ + assert(prog->Shaders[i]->Type != GL_GEOMETRY_SHADER); + break; + } + } + + /* Previous to GLSL version 1.30, different compilation units could mix and + * match shading language versions. With GLSL 1.30 and later, the versions + * of all shaders must match. + */ + assert(min_version >= 110); + assert(max_version <= 130); + if ((max_version >= 130) && (min_version != max_version)) { + linker_error_printf(prog, "all shaders must use same shading " + "language version\n"); + goto done; + } + + prog->Version = max_version; + + /* Link all shaders for a particular stage and validate the result. + */ + prog->_NumLinkedShaders = 0; + if (num_vert_shaders > 0) { + gl_shader *const sh = + link_intrastage_shaders(prog, vert_shader_list, num_vert_shaders); + + if (sh == NULL) + goto done; + + if (!validate_vertex_shader_executable(prog, sh)) + goto done; + + prog->_LinkedShaders[prog->_NumLinkedShaders] = sh; + prog->_NumLinkedShaders++; + } + + if (num_frag_shaders > 0) { + gl_shader *const sh = + link_intrastage_shaders(prog, frag_shader_list, num_frag_shaders); + + if (sh == NULL) + goto done; + + if (!validate_fragment_shader_executable(prog, sh)) + goto done; + + prog->_LinkedShaders[prog->_NumLinkedShaders] = sh; + prog->_NumLinkedShaders++; + } + + /* Here begins the inter-stage linking phase. Some initial validation is + * performed, then locations are assigned for uniforms, attributes, and + * varyings. + */ + if (cross_validate_uniforms(prog)) { + /* Validate the inputs of each stage with the output of the preceeding + * stage. + */ + for (unsigned i = 1; i < prog->_NumLinkedShaders; i++) { + if (!cross_validate_outputs_to_inputs(prog, + prog->_LinkedShaders[i - 1], + prog->_LinkedShaders[i])) + goto done; + } + + prog->LinkStatus = true; + } + + /* FINISHME: Perform whole-program optimization here. */ + for (unsigned i = 0; i < prog->_NumLinkedShaders; i++) { + /* Optimization passes */ + bool progress; + exec_list *ir = prog->_LinkedShaders[i]->ir; + + /* Lowering */ + do_mat_op_to_vec(ir); + do_mod_to_fract(ir); + do_div_to_mul_rcp(ir); + + do { + progress = false; + + progress = do_function_inlining(ir) || progress; + progress = do_if_simplification(ir) || progress; + progress = do_copy_propagation(ir) || progress; + progress = do_dead_code_local(ir) || progress; +#if 0 + progress = do_dead_code_unlinked(state, ir) || progress; +#endif + progress = do_constant_variable_unlinked(ir) || progress; + progress = do_constant_folding(ir) || progress; + progress = do_if_return(ir) || progress; +#if 0 + if (ctx->Shader.EmitNoIfs) + progress = do_if_to_cond_assign(ir) || progress; +#endif + + progress = do_vec_index_to_swizzle(ir) || progress; + /* Do this one after the previous to let the easier pass handle + * constant vector indexing. + */ + progress = do_vec_index_to_cond_assign(ir) || progress; + + progress = do_swizzle_swizzle(ir) || progress; + } while (progress); + } + + assign_uniform_locations(prog); + + if (prog->_LinkedShaders[0]->Type == GL_VERTEX_SHADER) + /* FINISHME: The value of the max_attribute_index parameter is + * FINISHME: implementation dependent based on the value of + * FINISHME: GL_MAX_VERTEX_ATTRIBS. GL_MAX_VERTEX_ATTRIBS must be + * FINISHME: at least 16, so hardcode 16 for now. + */ + if (!assign_attribute_locations(prog, 16)) + goto done; + + for (unsigned i = 1; i < prog->_NumLinkedShaders; i++) + assign_varying_locations(prog->_LinkedShaders[i - 1], + prog->_LinkedShaders[i]); + + /* FINISHME: Assign fragment shader output locations. */ + +done: + free(vert_shader_list); +} diff --cc src/glsl/program.h index bb1cd919cd6,00000000000..0a49203d4b2 mode 100644,000000..100644 --- a/src/glsl/program.h +++ b/src/glsl/program.h @@@ -1,33 -1,0 +1,33 @@@ +/* + * Copyright (C) 1999-2008 Brian Paul All Rights Reserved. + * Copyright (C) 2009 VMware, Inc. All Rights Reserved. + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include +#include "main/mtypes.h" + +extern "C" { - #include "shader/prog_parameter.h" - #include "shader/prog_uniform.h" ++#include "program/prog_parameter.h" ++#include "program/prog_uniform.h" +} + +extern void +link_shaders(struct gl_shader_program *prog); diff --cc src/mesa/main/shaderapi.c index 00000000000,9cb2391035d..89b9557e84f mode 000000,100644..100644 --- a/src/mesa/main/shaderapi.c +++ b/src/mesa/main/shaderapi.c @@@ -1,0 -1,1638 +1,1640 @@@ + /* + * Mesa 3-D graphics library + * + * Copyright (C) 2004-2008 Brian Paul All Rights Reserved. + * Copyright (C) 2009-2010 VMware, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + + /** + * \file shaderapi.c + * \author Brian Paul + * + * Implementation of GLSL-related API functions. + * The glUniform* functions are in uniforms.c + * + * + * XXX things to do: + * 1. Check that the right error code is generated for all _mesa_error() calls. + * 2. Insert FLUSH_VERTICES calls in various places + */ + + + #include "main/glheader.h" + #include "main/context.h" + #include "main/dispatch.h" + #include "main/enums.h" + #include "main/hash.h" + #include "main/shaderapi.h" + #include "main/shaderobj.h" + #include "program/program.h" + #include "program/prog_parameter.h" + #include "program/prog_uniform.h" + #include "slang/slang_compile.h" + #include "slang/slang_link.h" ++#include "talloc.h" + + + /** Define this to enable shader substitution (see below) */ + #define SHADER_SUBST 0 + + + /** + * Return mask of GLSL_x flags by examining the MESA_GLSL env var. + */ + static GLbitfield + get_shader_flags(void) + { + GLbitfield flags = 0x0; + const char *env = _mesa_getenv("MESA_GLSL"); + + if (env) { + if (strstr(env, "dump")) + flags |= GLSL_DUMP; + if (strstr(env, "log")) + flags |= GLSL_LOG; + if (strstr(env, "nopvert")) + flags |= GLSL_NOP_VERT; + if (strstr(env, "nopfrag")) + flags |= GLSL_NOP_FRAG; + if (strstr(env, "nopt")) + flags |= GLSL_NO_OPT; + else if (strstr(env, "opt")) + flags |= GLSL_OPT; + if (strstr(env, "uniform")) + flags |= GLSL_UNIFORMS; + if (strstr(env, "useprog")) + flags |= GLSL_USE_PROG; + } + + return flags; + } + + + /** + * Initialize context's shader state. + */ + void + _mesa_init_shader_state(GLcontext *ctx) + { + /* Device drivers may override these to control what kind of instructions + * are generated by the GLSL compiler. + */ + ctx->Shader.EmitHighLevelInstructions = GL_TRUE; + ctx->Shader.EmitContReturn = GL_TRUE; + ctx->Shader.EmitCondCodes = GL_FALSE; + ctx->Shader.EmitComments = GL_FALSE; ++ ctx->Shader.EmitNoIfs = GL_FALSE; + ctx->Shader.Flags = get_shader_flags(); + + /* Default pragma settings */ + ctx->Shader.DefaultPragmas.IgnoreOptimize = GL_FALSE; + ctx->Shader.DefaultPragmas.IgnoreDebug = GL_FALSE; + ctx->Shader.DefaultPragmas.Optimize = GL_TRUE; + ctx->Shader.DefaultPragmas.Debug = GL_FALSE; + } + + + /** + * Free the per-context shader-related state. + */ + void + _mesa_free_shader_state(GLcontext *ctx) + { + _mesa_reference_shader_program(ctx, &ctx->Shader.CurrentProgram, NULL); + } + + + /** + * Return the size of the given GLSL datatype, in floats (components). + */ + GLint + _mesa_sizeof_glsl_type(GLenum type) + { + switch (type) { + case GL_FLOAT: + case GL_INT: + case GL_BOOL: + case GL_SAMPLER_1D: + case GL_SAMPLER_2D: + case GL_SAMPLER_3D: + case GL_SAMPLER_CUBE: + case GL_SAMPLER_1D_SHADOW: + case GL_SAMPLER_2D_SHADOW: + case GL_SAMPLER_2D_RECT_ARB: + case GL_SAMPLER_2D_RECT_SHADOW_ARB: + case GL_SAMPLER_1D_ARRAY_EXT: + case GL_SAMPLER_2D_ARRAY_EXT: + case GL_SAMPLER_1D_ARRAY_SHADOW_EXT: + case GL_SAMPLER_2D_ARRAY_SHADOW_EXT: + case GL_SAMPLER_CUBE_SHADOW_EXT: + return 1; + case GL_FLOAT_VEC2: + case GL_INT_VEC2: + case GL_UNSIGNED_INT_VEC2: + case GL_BOOL_VEC2: + return 2; + case GL_FLOAT_VEC3: + case GL_INT_VEC3: + case GL_UNSIGNED_INT_VEC3: + case GL_BOOL_VEC3: + return 3; + case GL_FLOAT_VEC4: + case GL_INT_VEC4: + case GL_UNSIGNED_INT_VEC4: + case GL_BOOL_VEC4: + return 4; + case GL_FLOAT_MAT2: + case GL_FLOAT_MAT2x3: + case GL_FLOAT_MAT2x4: + return 8; /* two float[4] vectors */ + case GL_FLOAT_MAT3: + case GL_FLOAT_MAT3x2: + case GL_FLOAT_MAT3x4: + return 12; /* three float[4] vectors */ + case GL_FLOAT_MAT4: + case GL_FLOAT_MAT4x2: + case GL_FLOAT_MAT4x3: + return 16; /* four float[4] vectors */ + default: + _mesa_problem(NULL, "Invalid type in _mesa_sizeof_glsl_type()"); + return 1; + } + } + + + /** + * Copy string from to , up to maxLength characters, returning + * length of in . + * \param src the strings source + * \param maxLength max chars to copy + * \param length returns number of chars copied + * \param dst the string destination + */ + void + _mesa_copy_string(GLchar *dst, GLsizei maxLength, + GLsizei *length, const GLchar *src) + { + GLsizei len; + for (len = 0; len < maxLength - 1 && src && src[len]; len++) + dst[len] = src[len]; + if (maxLength > 0) + dst[len] = 0; + if (length) + *length = len; + } + + + + /** + * Find the length of the longest transform feedback varying name + * which was specified with glTransformFeedbackVaryings(). + */ + static GLint + longest_feedback_varying_name(const struct gl_shader_program *shProg) + { + GLuint i; + GLint max = 0; + for (i = 0; i < shProg->TransformFeedback.NumVarying; i++) { + GLint len = strlen(shProg->TransformFeedback.VaryingNames[i]); + if (len > max) + max = len; + } + return max; + } + + + + static GLboolean + is_program(GLcontext *ctx, GLuint name) + { + struct gl_shader_program *shProg = _mesa_lookup_shader_program(ctx, name); + return shProg ? GL_TRUE : GL_FALSE; + } + + + static GLboolean + is_shader(GLcontext *ctx, GLuint name) + { + struct gl_shader *shader = _mesa_lookup_shader(ctx, name); + return shader ? GL_TRUE : GL_FALSE; + } + + + /** + * Attach shader to a shader program. + */ + static void + attach_shader(GLcontext *ctx, GLuint program, GLuint shader) + { + struct gl_shader_program *shProg; + struct gl_shader *sh; + GLuint i, n; + + shProg = _mesa_lookup_shader_program_err(ctx, program, "glAttachShader"); + if (!shProg) + return; + + sh = _mesa_lookup_shader_err(ctx, shader, "glAttachShader"); + if (!sh) { + return; + } + + n = shProg->NumShaders; + for (i = 0; i < n; i++) { + if (shProg->Shaders[i] == sh) { + /* The shader is already attched to this program. The + * GL_ARB_shader_objects spec says: + * + * "The error INVALID_OPERATION is generated by AttachObjectARB + * if is already attached to ." + */ + _mesa_error(ctx, GL_INVALID_OPERATION, "glAttachShader"); + return; + } + } + + /* grow list */ + shProg->Shaders = (struct gl_shader **) + _mesa_realloc(shProg->Shaders, + n * sizeof(struct gl_shader *), + (n + 1) * sizeof(struct gl_shader *)); + if (!shProg->Shaders) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "glAttachShader"); + return; + } + + /* append */ + shProg->Shaders[n] = NULL; /* since realloc() didn't zero the new space */ + _mesa_reference_shader(ctx, &shProg->Shaders[n], sh); + shProg->NumShaders++; + } + + + static GLint + get_attrib_location(GLcontext *ctx, GLuint program, const GLchar *name) + { + struct gl_shader_program *shProg + = _mesa_lookup_shader_program_err(ctx, program, "glGetAttribLocation"); + + if (!shProg) { + return -1; + } + + if (!shProg->LinkStatus) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "glGetAttribLocation(program not linked)"); + return -1; + } + + if (!name) + return -1; + + if (shProg->VertexProgram) { + const struct gl_program_parameter_list *attribs = + shProg->VertexProgram->Base.Attributes; + if (attribs) { + GLint i = _mesa_lookup_parameter_index(attribs, -1, name); + if (i >= 0) { + return attribs->Parameters[i].StateIndexes[0]; + } + } + } + return -1; + } + + + static void + bind_attrib_location(GLcontext *ctx, GLuint program, GLuint index, + const GLchar *name) + { + struct gl_shader_program *shProg; + const GLint size = -1; /* unknown size */ + GLint i, oldIndex; + GLenum datatype = GL_FLOAT_VEC4; + + shProg = _mesa_lookup_shader_program_err(ctx, program, + "glBindAttribLocation"); + if (!shProg) { + return; + } + + if (!name) + return; + + if (strncmp(name, "gl_", 3) == 0) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "glBindAttribLocation(illegal name)"); + return; + } + + if (index >= ctx->Const.VertexProgram.MaxAttribs) { + _mesa_error(ctx, GL_INVALID_VALUE, "glBindAttribLocation(index)"); + return; + } + + if (shProg->LinkStatus) { + /* get current index/location for the attribute */ + oldIndex = get_attrib_location(ctx, program, name); + } + else { + oldIndex = -1; + } + + /* this will replace the current value if it's already in the list */ + i = _mesa_add_attribute(shProg->Attributes, name, size, datatype, index); + if (i < 0) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "glBindAttribLocation"); + return; + } + + /* + * Note that this attribute binding won't go into effect until + * glLinkProgram is called again. + */ + } + + + static GLuint + create_shader(GLcontext *ctx, GLenum type) + { + struct gl_shader *sh; + GLuint name; + + name = _mesa_HashFindFreeKeyBlock(ctx->Shared->ShaderObjects, 1); + + switch (type) { + case GL_FRAGMENT_SHADER: + case GL_VERTEX_SHADER: + case GL_GEOMETRY_SHADER_ARB: + sh = ctx->Driver.NewShader(ctx, name, type); + break; + default: + _mesa_error(ctx, GL_INVALID_ENUM, "CreateShader(type)"); + return 0; + } + + _mesa_HashInsert(ctx->Shared->ShaderObjects, name, sh); + + return name; + } + + + static GLuint + create_shader_program(GLcontext *ctx) + { + GLuint name; + struct gl_shader_program *shProg; + + name = _mesa_HashFindFreeKeyBlock(ctx->Shared->ShaderObjects, 1); + + shProg = ctx->Driver.NewShaderProgram(ctx, name); + + _mesa_HashInsert(ctx->Shared->ShaderObjects, name, shProg); + + assert(shProg->RefCount == 1); + + return name; + } + + + /** + * Named w/ "2" to indicate OpenGL 2.x vs GL_ARB_fragment_programs's + * DeleteProgramARB. + */ + static void + delete_shader_program(GLcontext *ctx, GLuint name) + { + /* + * NOTE: deleting shaders/programs works a bit differently than + * texture objects (and buffer objects, etc). Shader/program + * handles/IDs exist in the hash table until the object is really + * deleted (refcount==0). With texture objects, the handle/ID is + * removed from the hash table in glDeleteTextures() while the tex + * object itself might linger until its refcount goes to zero. + */ + struct gl_shader_program *shProg; + + shProg = _mesa_lookup_shader_program_err(ctx, name, "glDeleteProgram"); + if (!shProg) + return; + + shProg->DeletePending = GL_TRUE; + + /* effectively, decr shProg's refcount */ + _mesa_reference_shader_program(ctx, &shProg, NULL); + } + + + static void + delete_shader(GLcontext *ctx, GLuint shader) + { + struct gl_shader *sh; + + sh = _mesa_lookup_shader_err(ctx, shader, "glDeleteShader"); + if (!sh) + return; + + sh->DeletePending = GL_TRUE; + + /* effectively, decr sh's refcount */ + _mesa_reference_shader(ctx, &sh, NULL); + } + + + static void + detach_shader(GLcontext *ctx, GLuint program, GLuint shader) + { + struct gl_shader_program *shProg; + GLuint n; + GLuint i, j; + + shProg = _mesa_lookup_shader_program_err(ctx, program, "glDetachShader"); + if (!shProg) + return; + + n = shProg->NumShaders; + + for (i = 0; i < n; i++) { + if (shProg->Shaders[i]->Name == shader) { + /* found it */ + struct gl_shader **newList; + + /* release */ + _mesa_reference_shader(ctx, &shProg->Shaders[i], NULL); + + /* alloc new, smaller array */ + newList = (struct gl_shader **) + malloc((n - 1) * sizeof(struct gl_shader *)); + if (!newList) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "glDetachShader"); + return; + } + for (j = 0; j < i; j++) { + newList[j] = shProg->Shaders[j]; + } + while (++i < n) + newList[j++] = shProg->Shaders[i]; + free(shProg->Shaders); + + shProg->Shaders = newList; + shProg->NumShaders = n - 1; + + #ifdef DEBUG + /* sanity check */ + { + for (j = 0; j < shProg->NumShaders; j++) { + assert(shProg->Shaders[j]->Type == GL_VERTEX_SHADER || + shProg->Shaders[j]->Type == GL_FRAGMENT_SHADER); + assert(shProg->Shaders[j]->RefCount > 0); + } + } + #endif + + return; + } + } + + /* not found */ + { + GLenum err; + if (is_shader(ctx, shader)) + err = GL_INVALID_OPERATION; + else if (is_program(ctx, shader)) + err = GL_INVALID_OPERATION; + else + err = GL_INVALID_VALUE; + _mesa_error(ctx, err, "glDetachProgram(shader)"); + return; + } + } + + + static void + get_active_attrib(GLcontext *ctx, GLuint program, GLuint index, + GLsizei maxLength, GLsizei *length, GLint *size, + GLenum *type, GLchar *nameOut) + { + const struct gl_program_parameter_list *attribs = NULL; + struct gl_shader_program *shProg; + + shProg = _mesa_lookup_shader_program_err(ctx, program, "glGetActiveAttrib"); + if (!shProg) + return; + + if (shProg->VertexProgram) + attribs = shProg->VertexProgram->Base.Attributes; + + if (!attribs || index >= attribs->NumParameters) { + _mesa_error(ctx, GL_INVALID_VALUE, "glGetActiveAttrib(index)"); + return; + } + + _mesa_copy_string(nameOut, maxLength, length, + attribs->Parameters[index].Name); + + if (size) + *size = attribs->Parameters[index].Size + / _mesa_sizeof_glsl_type(attribs->Parameters[index].DataType); + + if (type) + *type = attribs->Parameters[index].DataType; + } + + + /** + * Return list of shaders attached to shader program. + */ + static void + get_attached_shaders(GLcontext *ctx, GLuint program, GLsizei maxCount, + GLsizei *count, GLuint *obj) + { + struct gl_shader_program *shProg = + _mesa_lookup_shader_program_err(ctx, program, "glGetAttachedShaders"); + if (shProg) { + GLuint i; + for (i = 0; i < (GLuint) maxCount && i < shProg->NumShaders; i++) { + obj[i] = shProg->Shaders[i]->Name; + } + if (count) + *count = i; + } + } + + + /** + * glGetHandleARB() - return ID/name of currently bound shader program. + */ + static GLuint + get_handle(GLcontext *ctx, GLenum pname) + { + if (pname == GL_PROGRAM_OBJECT_ARB) { + if (ctx->Shader.CurrentProgram) + return ctx->Shader.CurrentProgram->Name; + else + return 0; + } + else { + _mesa_error(ctx, GL_INVALID_ENUM, "glGetHandleARB"); + return 0; + } + } + + + /** + * glGetProgramiv() - get shader program state. + * Note that this is for GLSL shader programs, not ARB vertex/fragment + * programs (see glGetProgramivARB). + */ + static void + get_programiv(GLcontext *ctx, GLuint program, GLenum pname, GLint *params) + { + const struct gl_program_parameter_list *attribs; + struct gl_shader_program *shProg + = _mesa_lookup_shader_program(ctx, program); + + if (!shProg) { + _mesa_error(ctx, GL_INVALID_VALUE, "glGetProgramiv(program)"); + return; + } + + if (shProg->VertexProgram) + attribs = shProg->VertexProgram->Base.Attributes; + else + attribs = NULL; + + switch (pname) { + case GL_DELETE_STATUS: + *params = shProg->DeletePending; + break; + case GL_LINK_STATUS: + *params = shProg->LinkStatus; + break; + case GL_VALIDATE_STATUS: + *params = shProg->Validated; + break; + case GL_INFO_LOG_LENGTH: + *params = shProg->InfoLog ? strlen(shProg->InfoLog) + 1 : 0; + break; + case GL_ATTACHED_SHADERS: + *params = shProg->NumShaders; + break; + case GL_ACTIVE_ATTRIBUTES: + *params = attribs ? attribs->NumParameters : 0; + break; + case GL_ACTIVE_ATTRIBUTE_MAX_LENGTH: + *params = _mesa_longest_parameter_name(attribs, PROGRAM_INPUT) + 1; + break; + case GL_ACTIVE_UNIFORMS: + *params = shProg->Uniforms ? shProg->Uniforms->NumUniforms : 0; + break; + case GL_ACTIVE_UNIFORM_MAX_LENGTH: + *params = _mesa_longest_uniform_name(shProg->Uniforms); + if (*params > 0) + (*params)++; /* add one for terminating zero */ + break; + case GL_PROGRAM_BINARY_LENGTH_OES: + *params = 0; + break; + #if FEATURE_EXT_transform_feedback + case GL_TRANSFORM_FEEDBACK_VARYINGS: + *params = shProg->TransformFeedback.NumVarying; + break; + case GL_TRANSFORM_FEEDBACK_VARYING_MAX_LENGTH: + *params = longest_feedback_varying_name(shProg) + 1; + break; + case GL_TRANSFORM_FEEDBACK_BUFFER_MODE: + *params = shProg->TransformFeedback.BufferMode; + break; + #endif + #if FEATURE_ARB_geometry_shader4 + case GL_GEOMETRY_VERTICES_OUT_ARB: + *params = shProg->Geom.VerticesOut; + break; + case GL_GEOMETRY_INPUT_TYPE_ARB: + *params = shProg->Geom.InputType; + break; + case GL_GEOMETRY_OUTPUT_TYPE_ARB: + *params = shProg->Geom.OutputType; + break; + #endif + default: + _mesa_error(ctx, GL_INVALID_ENUM, "glGetProgramiv(pname)"); + return; + } + } + + + /** + * glGetShaderiv() - get GLSL shader state + */ + static void + get_shaderiv(GLcontext *ctx, GLuint name, GLenum pname, GLint *params) + { + struct gl_shader *shader = + _mesa_lookup_shader_err(ctx, name, "glGetShaderiv"); + + if (!shader) { + return; + } + + switch (pname) { + case GL_SHADER_TYPE: + *params = shader->Type; + break; + case GL_DELETE_STATUS: + *params = shader->DeletePending; + break; + case GL_COMPILE_STATUS: + *params = shader->CompileStatus; + break; + case GL_INFO_LOG_LENGTH: + *params = shader->InfoLog ? strlen(shader->InfoLog) + 1 : 0; + break; + case GL_SHADER_SOURCE_LENGTH: + *params = shader->Source ? strlen((char *) shader->Source) + 1 : 0; + break; + default: + _mesa_error(ctx, GL_INVALID_ENUM, "glGetShaderiv(pname)"); + return; + } + } + + + static void + get_program_info_log(GLcontext *ctx, GLuint program, GLsizei bufSize, + GLsizei *length, GLchar *infoLog) + { + struct gl_shader_program *shProg + = _mesa_lookup_shader_program(ctx, program); + if (!shProg) { + _mesa_error(ctx, GL_INVALID_VALUE, "glGetProgramInfoLog(program)"); + return; + } + _mesa_copy_string(infoLog, bufSize, length, shProg->InfoLog); + } + + + static void + get_shader_info_log(GLcontext *ctx, GLuint shader, GLsizei bufSize, + GLsizei *length, GLchar *infoLog) + { + struct gl_shader *sh = _mesa_lookup_shader(ctx, shader); + if (!sh) { + _mesa_error(ctx, GL_INVALID_VALUE, "glGetShaderInfoLog(shader)"); + return; + } + _mesa_copy_string(infoLog, bufSize, length, sh->InfoLog); + } + + + /** + * Return shader source code. + */ + static void + get_shader_source(GLcontext *ctx, GLuint shader, GLsizei maxLength, + GLsizei *length, GLchar *sourceOut) + { + struct gl_shader *sh; + sh = _mesa_lookup_shader_err(ctx, shader, "glGetShaderSource"); + if (!sh) { + return; + } + _mesa_copy_string(sourceOut, maxLength, length, sh->Source); + } + + + /** + * Set/replace shader source code. + */ + static void + shader_source(GLcontext *ctx, GLuint shader, const GLchar *source) + { + struct gl_shader *sh; + + sh = _mesa_lookup_shader_err(ctx, shader, "glShaderSource"); + if (!sh) + return; + + /* free old shader source string and install new one */ + if (sh->Source) { + free((void *) sh->Source); + } + sh->Source = source; + sh->CompileStatus = GL_FALSE; + #ifdef DEBUG + sh->SourceChecksum = _mesa_str_checksum(sh->Source); + #endif + } + + + /** + * Compile a shader. + */ + static void + compile_shader(GLcontext *ctx, GLuint shaderObj) + { + struct gl_shader *sh; + + sh = _mesa_lookup_shader_err(ctx, shaderObj, "glCompileShader"); + if (!sh) + return; + + /* set default pragma state for shader */ + sh->Pragmas = ctx->Shader.DefaultPragmas; + + /* this call will set the sh->CompileStatus field to indicate if + * compilation was successful. + */ - (void) _slang_compile(ctx, sh); ++ _mesa_glsl_compile_shader(ctx, sh); + } + + + /** + * Link a program's shaders. + */ + static void + link_program(GLcontext *ctx, GLuint program) + { + struct gl_shader_program *shProg; + struct gl_transform_feedback_object *obj = + ctx->TransformFeedback.CurrentObject; + + shProg = _mesa_lookup_shader_program_err(ctx, program, "glLinkProgram"); + if (!shProg) + return; + + if (obj->Active && shProg == ctx->Shader.CurrentProgram) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "glLinkProgram(transform feedback active"); + return; + } + + FLUSH_VERTICES(ctx, _NEW_PROGRAM); + - _slang_link(ctx, program, shProg); ++ _mesa_glsl_link_shader(ctx, shProg); + + /* debug code */ + if (0) { + GLuint i; + + printf("Link %u shaders in program %u: %s\n", + shProg->NumShaders, shProg->Name, + shProg->LinkStatus ? "Success" : "Failed"); + + for (i = 0; i < shProg->NumShaders; i++) { + printf(" shader %u, type 0x%x\n", + shProg->Shaders[i]->Name, + shProg->Shaders[i]->Type); + } + } + } + + + /** + * Print basic shader info (for debug). + */ + static void + print_shader_info(const struct gl_shader_program *shProg) + { + GLuint i; + + printf("Mesa: glUseProgram(%u)\n", shProg->Name); + for (i = 0; i < shProg->NumShaders; i++) { + const char *s; + switch (shProg->Shaders[i]->Type) { + case GL_VERTEX_SHADER: + s = "vertex"; + break; + case GL_FRAGMENT_SHADER: + s = "fragment"; + break; + case GL_GEOMETRY_SHADER: + s = "geometry"; + break; + default: + s = ""; + } + printf(" %s shader %u, checksum %u\n", s, + shProg->Shaders[i]->Name, + shProg->Shaders[i]->SourceChecksum); + } + if (shProg->VertexProgram) + printf(" vert prog %u\n", shProg->VertexProgram->Base.Id); + if (shProg->FragmentProgram) + printf(" frag prog %u\n", shProg->FragmentProgram->Base.Id); + } + + + /** + * Use the named shader program for subsequent rendering. + */ + void + _mesa_use_program(GLcontext *ctx, GLuint program) + { + struct gl_shader_program *shProg; + struct gl_transform_feedback_object *obj = + ctx->TransformFeedback.CurrentObject; + + if (obj->Active) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "glUseProgram(transform feedback active)"); + return; + } + + if (ctx->Shader.CurrentProgram && + ctx->Shader.CurrentProgram->Name == program) { + /* no-op */ + return; + } + + if (program) { + shProg = _mesa_lookup_shader_program_err(ctx, program, "glUseProgram"); + if (!shProg) { + return; + } + if (!shProg->LinkStatus) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "glUseProgram(program %u not linked)", program); + return; + } + + /* debug code */ + if (ctx->Shader.Flags & GLSL_USE_PROG) { + print_shader_info(shProg); + } + } + else { + shProg = NULL; + } + + if (ctx->Shader.CurrentProgram != shProg) { + FLUSH_VERTICES(ctx, _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS); + _mesa_reference_shader_program(ctx, &ctx->Shader.CurrentProgram, shProg); + } + + if (ctx->Driver.UseProgram) + ctx->Driver.UseProgram(ctx, shProg); + } + + + /** + * Validate a program's samplers. + * Specifically, check that there aren't two samplers of different types + * pointing to the same texture unit. + * \return GL_TRUE if valid, GL_FALSE if invalid + */ + static GLboolean + validate_samplers(GLcontext *ctx, const struct gl_program *prog, char *errMsg) + { + static const char *targetName[] = { + "TEXTURE_2D_ARRAY", + "TEXTURE_1D_ARRAY", + "TEXTURE_CUBE", + "TEXTURE_3D", + "TEXTURE_RECT", + "TEXTURE_2D", + "TEXTURE_1D", + }; + GLint targetUsed[MAX_TEXTURE_IMAGE_UNITS]; + GLbitfield samplersUsed = prog->SamplersUsed; + GLuint i; + + assert(Elements(targetName) == NUM_TEXTURE_TARGETS); + + if (samplersUsed == 0x0) + return GL_TRUE; + + for (i = 0; i < Elements(targetUsed); i++) + targetUsed[i] = -1; + + /* walk over bits which are set in 'samplers' */ + while (samplersUsed) { + GLuint unit; + gl_texture_index target; + GLint sampler = _mesa_ffs(samplersUsed) - 1; + assert(sampler >= 0); + assert(sampler < MAX_TEXTURE_IMAGE_UNITS); + unit = prog->SamplerUnits[sampler]; + target = prog->SamplerTargets[sampler]; + if (targetUsed[unit] != -1 && targetUsed[unit] != target) { + _mesa_snprintf(errMsg, 100, + "Texture unit %d is accessed both as %s and %s", + unit, targetName[targetUsed[unit]], targetName[target]); + return GL_FALSE; + } + targetUsed[unit] = target; + samplersUsed ^= (1 << sampler); + } + + return GL_TRUE; + } + + + /** + * Do validation of the given shader program. + * \param errMsg returns error message if validation fails. + * \return GL_TRUE if valid, GL_FALSE if invalid (and set errMsg) + */ + static GLboolean + validate_shader_program(GLcontext *ctx, + const struct gl_shader_program *shProg, + char *errMsg) + { + const struct gl_vertex_program *vp = shProg->VertexProgram; + const struct gl_fragment_program *fp = shProg->FragmentProgram; + + if (!shProg->LinkStatus) { + return GL_FALSE; + } + + /* From the GL spec, a program is invalid if any of these are true: + + any two active samplers in the current program object are of + different types, but refer to the same texture image unit, + + any active sampler in the current program object refers to a texture + image unit where fixed-function fragment processing accesses a + texture target that does not match the sampler type, or + + the sum of the number of active samplers in the program and the + number of texture image units enabled for fixed-function fragment + processing exceeds the combined limit on the total number of texture + image units allowed. + */ + + + /* + * Check: any two active samplers in the current program object are of + * different types, but refer to the same texture image unit, + */ + if (vp && !validate_samplers(ctx, &vp->Base, errMsg)) { + return GL_FALSE; + } + if (fp && !validate_samplers(ctx, &fp->Base, errMsg)) { + return GL_FALSE; + } + + return GL_TRUE; + } + + + /** + * Called via glValidateProgram() + */ + static void + validate_program(GLcontext *ctx, GLuint program) + { + struct gl_shader_program *shProg; + char errMsg[100]; + + shProg = _mesa_lookup_shader_program_err(ctx, program, "glValidateProgram"); + if (!shProg) { + return; + } + + shProg->Validated = validate_shader_program(ctx, shProg, errMsg); + if (!shProg->Validated) { + /* update info log */ + if (shProg->InfoLog) { - free(shProg->InfoLog); ++ talloc_free(shProg->InfoLog); + } - shProg->InfoLog = _mesa_strdup(errMsg); ++ shProg->InfoLog = talloc_strdup(shProg, errMsg); + } + } + + + + void GLAPIENTRY + _mesa_AttachObjectARB(GLhandleARB program, GLhandleARB shader) + { + GET_CURRENT_CONTEXT(ctx); + attach_shader(ctx, program, shader); + } + + + void GLAPIENTRY + _mesa_AttachShader(GLuint program, GLuint shader) + { + GET_CURRENT_CONTEXT(ctx); + attach_shader(ctx, program, shader); + } + + + void GLAPIENTRY + _mesa_BindAttribLocationARB(GLhandleARB program, GLuint index, + const GLcharARB *name) + { + GET_CURRENT_CONTEXT(ctx); + bind_attrib_location(ctx, program, index, name); + } + + + void GLAPIENTRY + _mesa_CompileShaderARB(GLhandleARB shaderObj) + { + GET_CURRENT_CONTEXT(ctx); + compile_shader(ctx, shaderObj); + } + + + GLuint GLAPIENTRY + _mesa_CreateShader(GLenum type) + { + GET_CURRENT_CONTEXT(ctx); + return create_shader(ctx, type); + } + + + GLhandleARB GLAPIENTRY + _mesa_CreateShaderObjectARB(GLenum type) + { + GET_CURRENT_CONTEXT(ctx); + return create_shader(ctx, type); + } + + + GLuint GLAPIENTRY + _mesa_CreateProgram(void) + { + GET_CURRENT_CONTEXT(ctx); + return create_shader_program(ctx); + } + + + GLhandleARB GLAPIENTRY + _mesa_CreateProgramObjectARB(void) + { + GET_CURRENT_CONTEXT(ctx); + return create_shader_program(ctx); + } + + + void GLAPIENTRY + _mesa_DeleteObjectARB(GLhandleARB obj) + { + if (obj) { + GET_CURRENT_CONTEXT(ctx); + if (is_program(ctx, obj)) { + delete_shader_program(ctx, obj); + } + else if (is_shader(ctx, obj)) { + delete_shader(ctx, obj); + } + else { + /* error? */ + } + } + } + + + void GLAPIENTRY + _mesa_DeleteProgram(GLuint name) + { + if (name) { + GET_CURRENT_CONTEXT(ctx); + delete_shader_program(ctx, name); + } + } + + + void GLAPIENTRY + _mesa_DeleteShader(GLuint name) + { + if (name) { + GET_CURRENT_CONTEXT(ctx); + delete_shader(ctx, name); + } + } + + + void GLAPIENTRY + _mesa_DetachObjectARB(GLhandleARB program, GLhandleARB shader) + { + GET_CURRENT_CONTEXT(ctx); + detach_shader(ctx, program, shader); + } + + + void GLAPIENTRY + _mesa_DetachShader(GLuint program, GLuint shader) + { + GET_CURRENT_CONTEXT(ctx); + detach_shader(ctx, program, shader); + } + + + void GLAPIENTRY + _mesa_GetActiveAttribARB(GLhandleARB program, GLuint index, + GLsizei maxLength, GLsizei * length, GLint * size, + GLenum * type, GLcharARB * name) + { + GET_CURRENT_CONTEXT(ctx); + get_active_attrib(ctx, program, index, maxLength, length, size, type, name); + } + + + void GLAPIENTRY + _mesa_GetAttachedObjectsARB(GLhandleARB container, GLsizei maxCount, + GLsizei * count, GLhandleARB * obj) + { + GET_CURRENT_CONTEXT(ctx); + get_attached_shaders(ctx, container, maxCount, count, obj); + } + + + void GLAPIENTRY + _mesa_GetAttachedShaders(GLuint program, GLsizei maxCount, + GLsizei *count, GLuint *obj) + { + GET_CURRENT_CONTEXT(ctx); + get_attached_shaders(ctx, program, maxCount, count, obj); + } + + + GLint GLAPIENTRY + _mesa_GetAttribLocationARB(GLhandleARB program, const GLcharARB * name) + { + GET_CURRENT_CONTEXT(ctx); + return get_attrib_location(ctx, program, name); + } + + + void GLAPIENTRY + _mesa_GetInfoLogARB(GLhandleARB object, GLsizei maxLength, GLsizei * length, + GLcharARB * infoLog) + { + GET_CURRENT_CONTEXT(ctx); + if (is_program(ctx, object)) { + get_program_info_log(ctx, object, maxLength, length, infoLog); + } + else if (is_shader(ctx, object)) { + get_shader_info_log(ctx, object, maxLength, length, infoLog); + } + else { + _mesa_error(ctx, GL_INVALID_OPERATION, "glGetInfoLogARB"); + } + } + + + void GLAPIENTRY + _mesa_GetObjectParameterivARB(GLhandleARB object, GLenum pname, GLint *params) + { + GET_CURRENT_CONTEXT(ctx); + /* Implement in terms of GetProgramiv, GetShaderiv */ + if (is_program(ctx, object)) { + if (pname == GL_OBJECT_TYPE_ARB) { + *params = GL_PROGRAM_OBJECT_ARB; + } + else { + get_programiv(ctx, object, pname, params); + } + } + else if (is_shader(ctx, object)) { + if (pname == GL_OBJECT_TYPE_ARB) { + *params = GL_SHADER_OBJECT_ARB; + } + else { + get_shaderiv(ctx, object, pname, params); + } + } + else { + _mesa_error(ctx, GL_INVALID_VALUE, "glGetObjectParameterivARB"); + } + } + + + void GLAPIENTRY + _mesa_GetObjectParameterfvARB(GLhandleARB object, GLenum pname, + GLfloat *params) + { + GLint iparams[1]; /* XXX is one element enough? */ + _mesa_GetObjectParameterivARB(object, pname, iparams); + params[0] = (GLfloat) iparams[0]; + } + + + void GLAPIENTRY + _mesa_GetProgramiv(GLuint program, GLenum pname, GLint *params) + { + GET_CURRENT_CONTEXT(ctx); + get_programiv(ctx, program, pname, params); + } + + + void GLAPIENTRY + _mesa_GetShaderiv(GLuint shader, GLenum pname, GLint *params) + { + GET_CURRENT_CONTEXT(ctx); + get_shaderiv(ctx, shader, pname, params); + } + + + void GLAPIENTRY + _mesa_GetProgramInfoLog(GLuint program, GLsizei bufSize, + GLsizei *length, GLchar *infoLog) + { + GET_CURRENT_CONTEXT(ctx); + get_program_info_log(ctx, program, bufSize, length, infoLog); + } + + + void GLAPIENTRY + _mesa_GetShaderInfoLog(GLuint shader, GLsizei bufSize, + GLsizei *length, GLchar *infoLog) + { + GET_CURRENT_CONTEXT(ctx); + get_shader_info_log(ctx, shader, bufSize, length, infoLog); + } + + + void GLAPIENTRY + _mesa_GetShaderSourceARB(GLhandleARB shader, GLsizei maxLength, + GLsizei *length, GLcharARB *sourceOut) + { + GET_CURRENT_CONTEXT(ctx); + get_shader_source(ctx, shader, maxLength, length, sourceOut); + } + + + GLhandleARB GLAPIENTRY + _mesa_GetHandleARB(GLenum pname) + { + GET_CURRENT_CONTEXT(ctx); + return get_handle(ctx, pname); + } + + + GLboolean GLAPIENTRY + _mesa_IsProgram(GLuint name) + { + GET_CURRENT_CONTEXT(ctx); + return is_program(ctx, name); + } + + + GLboolean GLAPIENTRY + _mesa_IsShader(GLuint name) + { + GET_CURRENT_CONTEXT(ctx); + return is_shader(ctx, name); + } + + + void GLAPIENTRY + _mesa_LinkProgramARB(GLhandleARB programObj) + { + GET_CURRENT_CONTEXT(ctx); + link_program(ctx, programObj); + } + + + + /** + * Read shader source code from a file. + * Useful for debugging to override an app's shader. + */ + static GLcharARB * + read_shader(const char *fname) + { + const int max = 50*1000; + FILE *f = fopen(fname, "r"); + GLcharARB *buffer, *shader; + int len; + + if (!f) { + return NULL; + } + + buffer = (char *) malloc(max); + len = fread(buffer, 1, max, f); + buffer[len] = 0; + + fclose(f); + + shader = _mesa_strdup(buffer); + free(buffer); + + return shader; + } + + + /** + * Called via glShaderSource() and glShaderSourceARB() API functions. + * Basically, concatenate the source code strings into one long string + * and pass it to _mesa_shader_source(). + */ + void GLAPIENTRY + _mesa_ShaderSourceARB(GLhandleARB shaderObj, GLsizei count, + const GLcharARB ** string, const GLint * length) + { + GET_CURRENT_CONTEXT(ctx); + GLint *offsets; + GLsizei i, totalLength; + GLcharARB *source; + GLuint checksum; + + if (!shaderObj || string == NULL) { + _mesa_error(ctx, GL_INVALID_VALUE, "glShaderSourceARB"); + return; + } + + /* + * This array holds offsets of where the appropriate string ends, thus the + * last element will be set to the total length of the source code. + */ + offsets = (GLint *) malloc(count * sizeof(GLint)); + if (offsets == NULL) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "glShaderSourceARB"); + return; + } + + for (i = 0; i < count; i++) { + if (string[i] == NULL) { + free((GLvoid *) offsets); + _mesa_error(ctx, GL_INVALID_OPERATION, "glShaderSourceARB(null string)"); + return; + } + if (length == NULL || length[i] < 0) + offsets[i] = strlen(string[i]); + else + offsets[i] = length[i]; + /* accumulate string lengths */ + if (i > 0) + offsets[i] += offsets[i - 1]; + } + + /* Total length of source string is sum off all strings plus two. + * One extra byte for terminating zero, another extra byte to silence + * valgrind warnings in the parser/grammer code. + */ + totalLength = offsets[count - 1] + 2; + source = (GLcharARB *) malloc(totalLength * sizeof(GLcharARB)); + if (source == NULL) { + free((GLvoid *) offsets); + _mesa_error(ctx, GL_OUT_OF_MEMORY, "glShaderSourceARB"); + return; + } + + for (i = 0; i < count; i++) { + GLint start = (i > 0) ? offsets[i - 1] : 0; + memcpy(source + start, string[i], + (offsets[i] - start) * sizeof(GLcharARB)); + } + source[totalLength - 1] = '\0'; + source[totalLength - 2] = '\0'; + + if (SHADER_SUBST) { + /* Compute the shader's source code checksum then try to open a file + * named newshader_. If it exists, use it in place of the + * original shader source code. For debugging. + */ + char filename[100]; + GLcharARB *newSource; + + checksum = _mesa_str_checksum(source); + + _mesa_snprintf(filename, sizeof(filename), "newshader_%d", checksum); + + newSource = read_shader(filename); + if (newSource) { + fprintf(stderr, "Mesa: Replacing shader %u chksum=%d with %s\n", + shaderObj, checksum, filename); + free(source); + source = newSource; + } + } + + shader_source(ctx, shaderObj, source); + + if (SHADER_SUBST) { + struct gl_shader *sh = _mesa_lookup_shader(ctx, shaderObj); + if (sh) + sh->SourceChecksum = checksum; /* save original checksum */ + } + + free(offsets); + } + + + void GLAPIENTRY + _mesa_UseProgramObjectARB(GLhandleARB program) + { + GET_CURRENT_CONTEXT(ctx); + FLUSH_VERTICES(ctx, _NEW_PROGRAM); + _mesa_use_program(ctx, program); + } + + + void GLAPIENTRY + _mesa_ValidateProgramARB(GLhandleARB program) + { + GET_CURRENT_CONTEXT(ctx); + validate_program(ctx, program); + } + + #ifdef FEATURE_ES2 + + void GLAPIENTRY + _mesa_GetShaderPrecisionFormat(GLenum shadertype, GLenum precisiontype, + GLint* range, GLint* precision) + { + GET_CURRENT_CONTEXT(ctx); + _mesa_error(ctx, GL_INVALID_OPERATION, __FUNCTION__); + } + + + void GLAPIENTRY + _mesa_ReleaseShaderCompiler(void) + { + GET_CURRENT_CONTEXT(ctx); + _mesa_error(ctx, GL_INVALID_OPERATION, __FUNCTION__); + } + + + void GLAPIENTRY + _mesa_ShaderBinary(GLint n, const GLuint* shaders, GLenum binaryformat, + const void* binary, GLint length) + { + GET_CURRENT_CONTEXT(ctx); + _mesa_error(ctx, GL_INVALID_OPERATION, __FUNCTION__); + } + + #endif /* FEATURE_ES2 */ + + + #if FEATURE_ARB_geometry_shader4 + + void GLAPIENTRY + _mesa_ProgramParameteriARB(GLuint program, GLenum pname, + GLint value) + { + struct gl_shader_program *shProg; + GET_CURRENT_CONTEXT(ctx); + + ASSERT_OUTSIDE_BEGIN_END(ctx); + + shProg = _mesa_lookup_shader_program_err(ctx, program, + "glProgramParameteri"); + if (!shProg) + return; + + switch (pname) { + case GL_GEOMETRY_VERTICES_OUT_ARB: + if (value < 1 || + value > ctx->Const.GeometryProgram.MaxGeometryOutputVertices) { + _mesa_error(ctx, GL_INVALID_VALUE, + "glProgramParameteri(GL_GEOMETRY_VERTICES_OUT_ARB=%d", + value); + return; + } + shProg->Geom.VerticesOut = value; + break; + case GL_GEOMETRY_INPUT_TYPE_ARB: + switch (value) { + case GL_POINTS: + case GL_LINES: + case GL_LINES_ADJACENCY_ARB: + case GL_TRIANGLES: + case GL_TRIANGLES_ADJACENCY_ARB: + shProg->Geom.InputType = value; + break; + default: + _mesa_error(ctx, GL_INVALID_VALUE, + "glProgramParameteri(geometry input type = %s", + _mesa_lookup_enum_by_nr(value)); + return; + } + break; + case GL_GEOMETRY_OUTPUT_TYPE_ARB: + switch (value) { + case GL_POINTS: + case GL_LINE_STRIP: + case GL_TRIANGLE_STRIP: + shProg->Geom.OutputType = value; + break; + default: + _mesa_error(ctx, GL_INVALID_VALUE, + "glProgramParameteri(geometry output type = %s", + _mesa_lookup_enum_by_nr(value)); + return; + } + break; + default: + _mesa_error(ctx, GL_INVALID_ENUM, "glProgramParameteriARB(pname=%s)", + _mesa_lookup_enum_by_nr(pname)); + break; + } + } + + #endif + + + /** + * Plug in shader-related functions into API dispatch table. + */ + void + _mesa_init_shader_dispatch(struct _glapi_table *exec) + { + #if FEATURE_GL + /* GL_ARB_vertex/fragment_shader */ + SET_DeleteObjectARB(exec, _mesa_DeleteObjectARB); + SET_GetHandleARB(exec, _mesa_GetHandleARB); + SET_DetachObjectARB(exec, _mesa_DetachObjectARB); + SET_CreateShaderObjectARB(exec, _mesa_CreateShaderObjectARB); + SET_ShaderSourceARB(exec, _mesa_ShaderSourceARB); + SET_CompileShaderARB(exec, _mesa_CompileShaderARB); + SET_CreateProgramObjectARB(exec, _mesa_CreateProgramObjectARB); + SET_AttachObjectARB(exec, _mesa_AttachObjectARB); + SET_LinkProgramARB(exec, _mesa_LinkProgramARB); + SET_UseProgramObjectARB(exec, _mesa_UseProgramObjectARB); + SET_ValidateProgramARB(exec, _mesa_ValidateProgramARB); + SET_GetObjectParameterfvARB(exec, _mesa_GetObjectParameterfvARB); + SET_GetObjectParameterivARB(exec, _mesa_GetObjectParameterivARB); + SET_GetInfoLogARB(exec, _mesa_GetInfoLogARB); + SET_GetAttachedObjectsARB(exec, _mesa_GetAttachedObjectsARB); + SET_GetShaderSourceARB(exec, _mesa_GetShaderSourceARB); + + /* OpenGL 2.0 */ + SET_AttachShader(exec, _mesa_AttachShader); + SET_CreateProgram(exec, _mesa_CreateProgram); + SET_CreateShader(exec, _mesa_CreateShader); + SET_DeleteProgram(exec, _mesa_DeleteProgram); + SET_DeleteShader(exec, _mesa_DeleteShader); + SET_DetachShader(exec, _mesa_DetachShader); + SET_GetAttachedShaders(exec, _mesa_GetAttachedShaders); + SET_GetProgramiv(exec, _mesa_GetProgramiv); + SET_GetProgramInfoLog(exec, _mesa_GetProgramInfoLog); + SET_GetShaderiv(exec, _mesa_GetShaderiv); + SET_GetShaderInfoLog(exec, _mesa_GetShaderInfoLog); + SET_IsProgram(exec, _mesa_IsProgram); + SET_IsShader(exec, _mesa_IsShader); + + #if FEATURE_ARB_vertex_shader + SET_BindAttribLocationARB(exec, _mesa_BindAttribLocationARB); + SET_GetActiveAttribARB(exec, _mesa_GetActiveAttribARB); + SET_GetAttribLocationARB(exec, _mesa_GetAttribLocationARB); + #endif + + #if FEATURE_ARB_geometry_shader4 + SET_ProgramParameteriARB(exec, _mesa_ProgramParameteriARB); + #endif + #endif /* FEATURE_GL */ + } + diff --cc src/mesa/main/shaderobj.c index 00000000000,14bbb2e4bc3..129d9742247 mode 000000,100644..100644 --- a/src/mesa/main/shaderobj.c +++ b/src/mesa/main/shaderobj.c @@@ -1,0 -1,392 +1,390 @@@ + /* + * Mesa 3-D graphics library + * + * Copyright (C) 2004-2008 Brian Paul All Rights Reserved. + * Copyright (C) 2009-2010 VMware, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + + /** + * \file shaderobj.c + * \author Brian Paul + * + */ + + + #include "main/glheader.h" + #include "main/context.h" + #include "main/hash.h" + #include "main/shaderobj.h" + #include "program/program.h" + #include "program/prog_parameter.h" + #include "program/prog_uniform.h" - ++#include "talloc.h" + + /**********************************************************************/ + /*** Shader object functions ***/ + /**********************************************************************/ + + + /** + * Set ptr to point to sh. + * If ptr is pointing to another shader, decrement its refcount (and delete + * if refcount hits zero). + * Then set ptr to point to sh, incrementing its refcount. + */ + void + _mesa_reference_shader(GLcontext *ctx, struct gl_shader **ptr, + struct gl_shader *sh) + { + assert(ptr); + if (*ptr == sh) { + /* no-op */ + return; + } + if (*ptr) { + /* Unreference the old shader */ + GLboolean deleteFlag = GL_FALSE; + struct gl_shader *old = *ptr; + + ASSERT(old->RefCount > 0); + old->RefCount--; + /*printf("SHADER DECR %p (%d) to %d\n", + (void*) old, old->Name, old->RefCount);*/ + deleteFlag = (old->RefCount == 0); + + if (deleteFlag) { + _mesa_HashRemove(ctx->Shared->ShaderObjects, old->Name); + ctx->Driver.DeleteShader(ctx, old); + } + + *ptr = NULL; + } + assert(!*ptr); + + if (sh) { + /* reference new */ + sh->RefCount++; + /*printf("SHADER INCR %p (%d) to %d\n", + (void*) sh, sh->Name, sh->RefCount);*/ + *ptr = sh; + } + } + + + /** + * Allocate a new gl_shader object, initialize it. + * Called via ctx->Driver.NewShader() + */ -static struct gl_shader * ++struct gl_shader * + _mesa_new_shader(GLcontext *ctx, GLuint name, GLenum type) + { + struct gl_shader *shader; + assert(type == GL_FRAGMENT_SHADER || type == GL_VERTEX_SHADER || + type == GL_GEOMETRY_SHADER_ARB); - shader = CALLOC_STRUCT(gl_shader); ++ shader = talloc_zero(NULL, struct gl_shader); + if (shader) { + shader->Type = type; + shader->Name = name; + shader->RefCount = 1; + } + return shader; + } + + + /** + * Delete a shader object. + * Called via ctx->Driver.DeleteShader(). + */ + static void + __mesa_delete_shader(GLcontext *ctx, struct gl_shader *sh) + { + if (sh->Source) + free((void *) sh->Source); - if (sh->InfoLog) - free(sh->InfoLog); + _mesa_reference_program(ctx, &sh->Program, NULL); - free(sh); ++ talloc_free(sh); + } + + + /** + * Lookup a GLSL shader object. + */ + struct gl_shader * + _mesa_lookup_shader(GLcontext *ctx, GLuint name) + { + if (name) { + struct gl_shader *sh = (struct gl_shader *) + _mesa_HashLookup(ctx->Shared->ShaderObjects, name); + /* Note that both gl_shader and gl_shader_program objects are kept + * in the same hash table. Check the object's type to be sure it's + * what we're expecting. + */ + if (sh && sh->Type == GL_SHADER_PROGRAM_MESA) { + return NULL; + } + return sh; + } + return NULL; + } + + + /** + * As above, but record an error if shader is not found. + */ + struct gl_shader * + _mesa_lookup_shader_err(GLcontext *ctx, GLuint name, const char *caller) + { + if (!name) { + _mesa_error(ctx, GL_INVALID_VALUE, caller); + return NULL; + } + else { + struct gl_shader *sh = (struct gl_shader *) + _mesa_HashLookup(ctx->Shared->ShaderObjects, name); + if (!sh) { + _mesa_error(ctx, GL_INVALID_VALUE, caller); + return NULL; + } + if (sh->Type == GL_SHADER_PROGRAM_MESA) { + _mesa_error(ctx, GL_INVALID_OPERATION, caller); + return NULL; + } + return sh; + } + } + + + + /**********************************************************************/ + /*** Shader Program object functions ***/ + /**********************************************************************/ + + + /** + * Set ptr to point to shProg. + * If ptr is pointing to another object, decrement its refcount (and delete + * if refcount hits zero). + * Then set ptr to point to shProg, incrementing its refcount. + */ + void + _mesa_reference_shader_program(GLcontext *ctx, + struct gl_shader_program **ptr, + struct gl_shader_program *shProg) + { + assert(ptr); + if (*ptr == shProg) { + /* no-op */ + return; + } + if (*ptr) { + /* Unreference the old shader program */ + GLboolean deleteFlag = GL_FALSE; + struct gl_shader_program *old = *ptr; + + ASSERT(old->RefCount > 0); + old->RefCount--; + #if 0 + printf("ShaderProgram %p ID=%u RefCount-- to %d\n", + (void *) old, old->Name, old->RefCount); + #endif + deleteFlag = (old->RefCount == 0); + + if (deleteFlag) { + _mesa_HashRemove(ctx->Shared->ShaderObjects, old->Name); + ctx->Driver.DeleteShaderProgram(ctx, old); + } + + *ptr = NULL; + } + assert(!*ptr); + + if (shProg) { + shProg->RefCount++; + #if 0 + printf("ShaderProgram %p ID=%u RefCount++ to %d\n", + (void *) shProg, shProg->Name, shProg->RefCount); + #endif + *ptr = shProg; + } + } + + + /** + * Allocate a new gl_shader_program object, initialize it. + * Called via ctx->Driver.NewShaderProgram() + */ + static struct gl_shader_program * + _mesa_new_shader_program(GLcontext *ctx, GLuint name) + { + struct gl_shader_program *shProg; - shProg = CALLOC_STRUCT(gl_shader_program); ++ shProg = talloc_zero(NULL, struct gl_shader_program); + if (shProg) { + shProg->Type = GL_SHADER_PROGRAM_MESA; + shProg->Name = name; + shProg->RefCount = 1; + shProg->Attributes = _mesa_new_parameter_list(); + #if FEATURE_ARB_geometry_shader4 + shProg->Geom.VerticesOut = 0; + shProg->Geom.InputType = GL_TRIANGLES; + shProg->Geom.OutputType = GL_TRIANGLE_STRIP; + #endif + } + return shProg; + } + + + /** + * Clear (free) the shader program state that gets produced by linking. + */ + void + _mesa_clear_shader_program_data(GLcontext *ctx, + struct gl_shader_program *shProg) + { + _mesa_reference_vertprog(ctx, &shProg->VertexProgram, NULL); + _mesa_reference_fragprog(ctx, &shProg->FragmentProgram, NULL); + _mesa_reference_geomprog(ctx, &shProg->GeometryProgram, NULL); + + if (shProg->Uniforms) { + _mesa_free_uniform_list(shProg->Uniforms); + shProg->Uniforms = NULL; + } + + if (shProg->Varying) { + _mesa_free_parameter_list(shProg->Varying); + shProg->Varying = NULL; + } + } + + + /** + * Free all the data that hangs off a shader program object, but not the + * object itself. + */ + void + _mesa_free_shader_program_data(GLcontext *ctx, + struct gl_shader_program *shProg) + { + GLuint i; + + assert(shProg->Type == GL_SHADER_PROGRAM_MESA); + + _mesa_clear_shader_program_data(ctx, shProg); + + if (shProg->Attributes) { + _mesa_free_parameter_list(shProg->Attributes); + shProg->Attributes = NULL; + } + + /* detach shaders */ + for (i = 0; i < shProg->NumShaders; i++) { + _mesa_reference_shader(ctx, &shProg->Shaders[i], NULL); + } + shProg->NumShaders = 0; + + if (shProg->Shaders) { + free(shProg->Shaders); + shProg->Shaders = NULL; + } + + if (shProg->InfoLog) { - free(shProg->InfoLog); ++ talloc_free(shProg->InfoLog); + shProg->InfoLog = NULL; + } + + /* Transform feedback varying vars */ + for (i = 0; i < shProg->TransformFeedback.NumVarying; i++) { + free(shProg->TransformFeedback.VaryingNames[i]); + } + free(shProg->TransformFeedback.VaryingNames); + shProg->TransformFeedback.VaryingNames = NULL; + shProg->TransformFeedback.NumVarying = 0; + } + + + /** + * Free/delete a shader program object. + * Called via ctx->Driver.DeleteShaderProgram(). + */ + static void + __mesa_delete_shader_program(GLcontext *ctx, struct gl_shader_program *shProg) + { + _mesa_free_shader_program_data(ctx, shProg); + - free(shProg); ++ talloc_free(shProg); + } + + + /** + * Lookup a GLSL program object. + */ + struct gl_shader_program * + _mesa_lookup_shader_program(GLcontext *ctx, GLuint name) + { + struct gl_shader_program *shProg; + if (name) { + shProg = (struct gl_shader_program *) + _mesa_HashLookup(ctx->Shared->ShaderObjects, name); + /* Note that both gl_shader and gl_shader_program objects are kept + * in the same hash table. Check the object's type to be sure it's + * what we're expecting. + */ + if (shProg && shProg->Type != GL_SHADER_PROGRAM_MESA) { + return NULL; + } + return shProg; + } + return NULL; + } + + + /** + * As above, but record an error if program is not found. + */ + struct gl_shader_program * + _mesa_lookup_shader_program_err(GLcontext *ctx, GLuint name, + const char *caller) + { + if (!name) { + _mesa_error(ctx, GL_INVALID_VALUE, caller); + return NULL; + } + else { + struct gl_shader_program *shProg = (struct gl_shader_program *) + _mesa_HashLookup(ctx->Shared->ShaderObjects, name); + if (!shProg) { + _mesa_error(ctx, GL_INVALID_VALUE, caller); + return NULL; + } + if (shProg->Type != GL_SHADER_PROGRAM_MESA) { + _mesa_error(ctx, GL_INVALID_OPERATION, caller); + return NULL; + } + return shProg; + } + } + + + void + _mesa_init_shader_object_functions(struct dd_function_table *driver) + { + driver->NewShader = _mesa_new_shader; + driver->DeleteShader = __mesa_delete_shader; + driver->NewShaderProgram = _mesa_new_shader_program; + driver->DeleteShaderProgram = __mesa_delete_shader_program; + } diff --cc src/mesa/main/shaderobj.h index 00000000000,d6b37b45963..b48244dc0d2 mode 000000,100644..100644 --- a/src/mesa/main/shaderobj.h +++ b/src/mesa/main/shaderobj.h @@@ -1,0 -1,78 +1,98 @@@ + /* + * Mesa 3-D graphics library + * Version: 6.5.3 + * + * Copyright (C) 2004-2007 Brian Paul All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + + + #ifndef SHADEROBJ_H + #define SHADEROBJ_H + + -#include "glheader.h" -#include "mtypes.h" ++#include "main/glheader.h" ++#include "main/mtypes.h" ++#include "program/ir_to_mesa.h" ++ ++#ifdef __cplusplus ++extern "C" { ++#endif ++/** ++ * Internal functions ++ */ ++ ++extern void ++_mesa_init_shader_state(GLcontext * ctx); ++ ++extern void ++_mesa_free_shader_state(GLcontext *ctx); + + + extern void + _mesa_reference_shader(GLcontext *ctx, struct gl_shader **ptr, + struct gl_shader *sh); + + extern struct gl_shader * + _mesa_lookup_shader(GLcontext *ctx, GLuint name); + + extern struct gl_shader * + _mesa_lookup_shader_err(GLcontext *ctx, GLuint name, const char *caller); + + + + extern void + _mesa_reference_shader_program(GLcontext *ctx, + struct gl_shader_program **ptr, + struct gl_shader_program *shProg); + ++extern struct gl_shader * ++_mesa_new_shader(GLcontext *ctx, GLuint name, GLenum type); ++ + extern struct gl_shader_program * + _mesa_lookup_shader_program(GLcontext *ctx, GLuint name); + + extern struct gl_shader_program * + _mesa_lookup_shader_program_err(GLcontext *ctx, GLuint name, + const char *caller); + + extern void + _mesa_clear_shader_program_data(GLcontext *ctx, + struct gl_shader_program *shProg); + + extern void + _mesa_free_shader_program_data(GLcontext *ctx, + struct gl_shader_program *shProg); + + + + extern void + _mesa_init_shader_object_functions(struct dd_function_table *driver); + + extern void + _mesa_init_shader_state(GLcontext *ctx); + + extern void + _mesa_free_shader_state(GLcontext *ctx); + ++#ifdef __cplusplus ++}; ++#endif + + #endif /* SHADEROBJ_H */ diff --cc src/mesa/program/hash_table.c index 00000000000,fa6ba2bfdfc..f7ef366c1a0 mode 000000,100644..100644 --- a/src/mesa/program/hash_table.c +++ b/src/mesa/program/hash_table.c @@@ -1,0 -1,159 +1,190 @@@ + /* + * Copyright © 2008 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + + /** + * \file hash_table.c + * \brief Implementation of a generic, opaque hash table data type. + * + * \author Ian Romanick + */ + + #include "main/imports.h" + #include "main/simple_list.h" + #include "hash_table.h" + + struct node { + struct node *next; + struct node *prev; + }; + + struct hash_table { + hash_func_t hash; + hash_compare_func_t compare; + + unsigned num_buckets; + struct node buckets[1]; + }; + + + struct hash_node { + struct node link; + const void *key; + void *data; + }; + + + struct hash_table * + hash_table_ctor(unsigned num_buckets, hash_func_t hash, + hash_compare_func_t compare) + { + struct hash_table *ht; + unsigned i; + + + if (num_buckets < 16) { + num_buckets = 16; + } + + ht = malloc(sizeof(*ht) + ((num_buckets - 1) + * sizeof(ht->buckets[0]))); + if (ht != NULL) { + ht->hash = hash; + ht->compare = compare; + ht->num_buckets = num_buckets; + + for (i = 0; i < num_buckets; i++) { + make_empty_list(& ht->buckets[i]); + } + } + + return ht; + } + + + void + hash_table_dtor(struct hash_table *ht) + { + hash_table_clear(ht); + free(ht); + } + + + void + hash_table_clear(struct hash_table *ht) + { + struct node *node; + struct node *temp; + unsigned i; + + + for (i = 0; i < ht->num_buckets; i++) { + foreach_s(node, temp, & ht->buckets[i]) { + remove_from_list(node); + free(node); + } + + assert(is_empty_list(& ht->buckets[i])); + } + } + + + void * + hash_table_find(struct hash_table *ht, const void *key) + { + const unsigned hash_value = (*ht->hash)(key); + const unsigned bucket = hash_value % ht->num_buckets; + struct node *node; + + foreach(node, & ht->buckets[bucket]) { + struct hash_node *hn = (struct hash_node *) node; + + if ((*ht->compare)(hn->key, key) == 0) { + return hn->data; + } + } + + return NULL; + } + + + void + hash_table_insert(struct hash_table *ht, void *data, const void *key) + { + const unsigned hash_value = (*ht->hash)(key); + const unsigned bucket = hash_value % ht->num_buckets; + struct hash_node *node; + + node = calloc(1, sizeof(*node)); + + node->data = data; + node->key = key; + + insert_at_head(& ht->buckets[bucket], & node->link); + } + ++void ++hash_table_remove(struct hash_table *ht, const void *key) ++{ ++ const unsigned hash_value = (*ht->hash)(key); ++ const unsigned bucket = hash_value % ht->num_buckets; ++ struct node *node; ++ ++ foreach(node, & ht->buckets[bucket]) { ++ struct hash_node *hn = (struct hash_node *) node; ++ ++ if ((*ht->compare)(hn->key, key) == 0) { ++ remove_from_list(node); ++ free(node); ++ return; ++ } ++ } ++} + + unsigned + hash_table_string_hash(const void *key) + { + const char *str = (const char *) key; + unsigned hash = 5381; + + + while (*str != '\0') { + hash = (hash * 33) + *str; + str++; + } + + return hash; + } ++ ++ ++unsigned ++hash_table_pointer_hash(const void *key) ++{ ++ return (unsigned)((uintptr_t) key / sizeof(void *)); ++} ++ ++ ++int ++hash_table_pointer_compare(const void *key1, const void *key2) ++{ ++ return key1 == key2 ? 0 : 1; ++} diff --cc src/mesa/program/hash_table.h index 00000000000,7b302f5dbee..228ab948ff4 mode 000000,100644..100644 --- a/src/mesa/program/hash_table.h +++ b/src/mesa/program/hash_table.h @@@ -1,0 -1,117 +1,152 @@@ + /* + * Copyright © 2008 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + + /** + * \file hash_table.h + * \brief Implementation of a generic, opaque hash table data type. + * + * \author Ian Romanick + */ + + #ifndef HASH_TABLE_H + #define HASH_TABLE_H + + #include + + struct hash_table; + + typedef unsigned (*hash_func_t)(const void *key); + typedef int (*hash_compare_func_t)(const void *key1, const void *key2); + ++#ifdef __cplusplus ++extern "C" { ++#endif ++ + /** + * Hash table constructor + * + * Creates a hash table with the specified number of buckets. The supplied + * \c hash and \c compare routines are used when adding elements to the table + * and when searching for elements in the table. + * + * \param num_buckets Number of buckets (bins) in the hash table. + * \param hash Function used to compute hash value of input keys. + * \param compare Function used to compare keys. + */ + extern struct hash_table *hash_table_ctor(unsigned num_buckets, + hash_func_t hash, hash_compare_func_t compare); + + + /** + * Release all memory associated with a hash table + * + * \warning + * This function cannot release memory occupied either by keys or data. + */ + extern void hash_table_dtor(struct hash_table *ht); + + + /** + * Flush all entries from a hash table + * + * \param ht Table to be cleared of its entries. + */ + extern void hash_table_clear(struct hash_table *ht); + + + /** + * Search a hash table for a specific element + * + * \param ht Table to be searched + * \param key Key of the desired element + * + * \return + * The \c data value supplied to \c hash_table_insert when the element with + * the matching key was added. If no matching key exists in the table, + * \c NULL is returned. + */ + extern void *hash_table_find(struct hash_table *ht, const void *key); + + + /** + * Add an element to a hash table + */ + extern void hash_table_insert(struct hash_table *ht, void *data, + const void *key); + ++/** ++ * Remove a specific element from a hash table. ++ */ ++extern void hash_table_remove(struct hash_table *ht, const void *key); + + /** + * Compute hash value of a string + * + * Computes the hash value of a string using the DJB2 algorithm developed by + * Professor Daniel J. Bernstein. It was published on comp.lang.c once upon + * a time. I was unable to find the original posting in the archives. + * + * \param key Pointer to a NUL terminated string to be hashed. + * + * \sa hash_table_string_compare + */ + extern unsigned hash_table_string_hash(const void *key); + + + /** + * Compare two strings used as keys + * + * This is just a macro wrapper around \c strcmp. + * + * \sa hash_table_string_hash + */ + #define hash_table_string_compare ((hash_compare_func_t) strcmp) + ++ ++/** ++ * Compute hash value of a pointer ++ * ++ * \param key Pointer to be used as a hash key ++ * ++ * \note ++ * The memory pointed to by \c key is \b never accessed. The value of \c key ++ * itself is used as the hash key ++ * ++ * \sa hash_table_pointer_compare ++ */ ++unsigned ++hash_table_pointer_hash(const void *key); ++ ++ ++/** ++ * Compare two pointers used as keys ++ * ++ * \sa hash_table_pointer_hash ++ */ ++int ++hash_table_pointer_compare(const void *key1, const void *key2); ++ ++#ifdef __cplusplus ++}; ++#endif + #endif /* HASH_TABLE_H */ diff --cc src/mesa/program/ir_to_mesa.cpp index 00000000000,00000000000..1903b8fcf8f new file mode 100644 --- /dev/null +++ b/src/mesa/program/ir_to_mesa.cpp @@@ -1,0 -1,0 +1,2309 @@@ ++/* ++ * Copyright (C) 2005-2007 Brian Paul All Rights Reserved. ++ * Copyright (C) 2008 VMware, Inc. All Rights Reserved. ++ * Copyright © 2010 Intel Corporation ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the "Software"), ++ * to deal in the Software without restriction, including without limitation ++ * the rights to use, copy, modify, merge, publish, distribute, sublicense, ++ * and/or sell copies of the Software, and to permit persons to whom the ++ * Software is furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice (including the next ++ * paragraph) shall be included in all copies or substantial portions of the ++ * Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ++ * DEALINGS IN THE SOFTWARE. ++ */ ++ ++/** ++ * \file ir_to_mesa.cpp ++ * ++ * Translates the IR to ARB_fragment_program text if possible, ++ * printing the result ++ */ ++ ++#include ++#include "ir.h" ++#include "ir_visitor.h" ++#include "ir_print_visitor.h" ++#include "ir_expression_flattening.h" ++#include "glsl_types.h" ++#include "glsl_parser_extras.h" ++#include "../glsl/program.h" ++#include "ir_optimization.h" ++#include "ast.h" ++ ++extern "C" { ++#include "main/mtypes.h" ++#include "main/shaderobj.h" ++#include "main/uniforms.h" ++#include "program/prog_instruction.h" ++#include "program/prog_optimize.h" ++#include "program/prog_print.h" ++#include "program/program.h" ++#include "program/prog_uniform.h" ++#include "program/prog_parameter.h" ++} ++ ++/** ++ * This struct is a corresponding struct to Mesa prog_src_register, with ++ * wider fields. ++ */ ++typedef struct ir_to_mesa_src_reg { ++ int file; /**< PROGRAM_* from Mesa */ ++ int index; /**< temporary index, VERT_ATTRIB_*, FRAG_ATTRIB_*, etc. */ ++ GLuint swizzle; /**< SWIZZLE_XYZWONEZERO swizzles from Mesa. */ ++ int negate; /**< NEGATE_XYZW mask from mesa */ ++ /** Register index should be offset by the integer in this reg. */ ++ ir_to_mesa_src_reg *reladdr; ++} ir_to_mesa_src_reg; ++ ++typedef struct ir_to_mesa_dst_reg { ++ int file; /**< PROGRAM_* from Mesa */ ++ int index; /**< temporary index, VERT_ATTRIB_*, FRAG_ATTRIB_*, etc. */ ++ int writemask; /**< Bitfield of WRITEMASK_[XYZW] */ ++ GLuint cond_mask:4; ++ /** Register index should be offset by the integer in this reg. */ ++ ir_to_mesa_src_reg *reladdr; ++} ir_to_mesa_dst_reg; ++ ++extern ir_to_mesa_src_reg ir_to_mesa_undef; ++ ++class ir_to_mesa_instruction : public exec_node { ++public: ++ enum prog_opcode op; ++ ir_to_mesa_dst_reg dst_reg; ++ ir_to_mesa_src_reg src_reg[3]; ++ /** Pointer to the ir source this tree came from for debugging */ ++ ir_instruction *ir; ++ GLboolean cond_update; ++ int sampler; /**< sampler index */ ++ int tex_target; /**< One of TEXTURE_*_INDEX */ ++ GLboolean tex_shadow; ++ ++ class function_entry *function; /* Set on OPCODE_CAL or OPCODE_BGNSUB */ ++}; ++ ++class variable_storage : public exec_node { ++public: ++ variable_storage(ir_variable *var, int file, int index) ++ : file(file), index(index), var(var) ++ { ++ /* empty */ ++ } ++ ++ int file; ++ int index; ++ ir_variable *var; /* variable that maps to this, if any */ ++}; ++ ++class function_entry : public exec_node { ++public: ++ ir_function_signature *sig; ++ ++ /** ++ * identifier of this function signature used by the program. ++ * ++ * At the point that Mesa instructions for function calls are ++ * generated, we don't know the address of the first instruction of ++ * the function body. So we make the BranchTarget that is called a ++ * small integer and rewrite them during set_branchtargets(). ++ */ ++ int sig_id; ++ ++ /** ++ * Pointer to first instruction of the function body. ++ * ++ * Set during function body emits after main() is processed. ++ */ ++ ir_to_mesa_instruction *bgn_inst; ++ ++ /** ++ * Index of the first instruction of the function body in actual ++ * Mesa IR. ++ * ++ * Set after convertion from ir_to_mesa_instruction to prog_instruction. ++ */ ++ int inst; ++ ++ /** Storage for the return value. */ ++ ir_to_mesa_src_reg return_reg; ++}; ++ ++class ir_to_mesa_visitor : public ir_visitor { ++public: ++ ir_to_mesa_visitor(); ++ ++ function_entry *current_function; ++ ++ GLcontext *ctx; ++ struct gl_program *prog; ++ ++ int next_temp; ++ ++ variable_storage *find_variable_storage(ir_variable *var); ++ ++ function_entry *get_function_signature(ir_function_signature *sig); ++ ++ ir_to_mesa_src_reg get_temp(const glsl_type *type); ++ void reladdr_to_temp(ir_instruction *ir, ++ ir_to_mesa_src_reg *reg, int *num_reladdr); ++ ++ struct ir_to_mesa_src_reg src_reg_for_float(float val); ++ ++ /** ++ * \name Visit methods ++ * ++ * As typical for the visitor pattern, there must be one \c visit method for ++ * each concrete subclass of \c ir_instruction. Virtual base classes within ++ * the hierarchy should not have \c visit methods. ++ */ ++ /*@{*/ ++ virtual void visit(ir_variable *); ++ virtual void visit(ir_loop *); ++ virtual void visit(ir_loop_jump *); ++ virtual void visit(ir_function_signature *); ++ virtual void visit(ir_function *); ++ virtual void visit(ir_expression *); ++ virtual void visit(ir_swizzle *); ++ virtual void visit(ir_dereference_variable *); ++ virtual void visit(ir_dereference_array *); ++ virtual void visit(ir_dereference_record *); ++ virtual void visit(ir_assignment *); ++ virtual void visit(ir_constant *); ++ virtual void visit(ir_call *); ++ virtual void visit(ir_return *); ++ virtual void visit(ir_discard *); ++ virtual void visit(ir_texture *); ++ virtual void visit(ir_if *); ++ /*@}*/ ++ ++ struct ir_to_mesa_src_reg result; ++ ++ /** List of variable_storage */ ++ exec_list variables; ++ ++ /** List of function_entry */ ++ exec_list function_signatures; ++ int next_signature_id; ++ ++ /** List of ir_to_mesa_instruction */ ++ exec_list instructions; ++ ++ ir_to_mesa_instruction *ir_to_mesa_emit_op0(ir_instruction *ir, ++ enum prog_opcode op); ++ ++ ir_to_mesa_instruction *ir_to_mesa_emit_op1(ir_instruction *ir, ++ enum prog_opcode op, ++ ir_to_mesa_dst_reg dst, ++ ir_to_mesa_src_reg src0); ++ ++ ir_to_mesa_instruction *ir_to_mesa_emit_op2(ir_instruction *ir, ++ enum prog_opcode op, ++ ir_to_mesa_dst_reg dst, ++ ir_to_mesa_src_reg src0, ++ ir_to_mesa_src_reg src1); ++ ++ ir_to_mesa_instruction *ir_to_mesa_emit_op3(ir_instruction *ir, ++ enum prog_opcode op, ++ ir_to_mesa_dst_reg dst, ++ ir_to_mesa_src_reg src0, ++ ir_to_mesa_src_reg src1, ++ ir_to_mesa_src_reg src2); ++ ++ void ir_to_mesa_emit_scalar_op1(ir_instruction *ir, ++ enum prog_opcode op, ++ ir_to_mesa_dst_reg dst, ++ ir_to_mesa_src_reg src0); ++ ++ void ir_to_mesa_emit_scalar_op2(ir_instruction *ir, ++ enum prog_opcode op, ++ ir_to_mesa_dst_reg dst, ++ ir_to_mesa_src_reg src0, ++ ir_to_mesa_src_reg src1); ++ ++ GLboolean try_emit_mad(ir_expression *ir, ++ int mul_operand); ++ ++ int *sampler_map; ++ int sampler_map_size; ++ ++ void map_sampler(int location, int sampler); ++ int get_sampler_number(int location); ++ ++ void *mem_ctx; ++}; ++ ++ir_to_mesa_src_reg ir_to_mesa_undef = { ++ PROGRAM_UNDEFINED, 0, SWIZZLE_NOOP, NEGATE_NONE, NULL, ++}; ++ ++ir_to_mesa_dst_reg ir_to_mesa_undef_dst = { ++ PROGRAM_UNDEFINED, 0, SWIZZLE_NOOP, COND_TR, NULL, ++}; ++ ++ir_to_mesa_dst_reg ir_to_mesa_address_reg = { ++ PROGRAM_ADDRESS, 0, WRITEMASK_X, COND_TR, NULL ++}; ++ ++static int swizzle_for_size(int size) ++{ ++ int size_swizzles[4] = { ++ MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), ++ MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y), ++ MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z), ++ MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W), ++ }; ++ ++ return size_swizzles[size - 1]; ++} ++ ++ir_to_mesa_instruction * ++ir_to_mesa_visitor::ir_to_mesa_emit_op3(ir_instruction *ir, ++ enum prog_opcode op, ++ ir_to_mesa_dst_reg dst, ++ ir_to_mesa_src_reg src0, ++ ir_to_mesa_src_reg src1, ++ ir_to_mesa_src_reg src2) ++{ ++ ir_to_mesa_instruction *inst = new(mem_ctx) ir_to_mesa_instruction(); ++ int num_reladdr = 0; ++ ++ /* If we have to do relative addressing, we want to load the ARL ++ * reg directly for one of the regs, and preload the other reladdr ++ * sources into temps. ++ */ ++ num_reladdr += dst.reladdr != NULL; ++ num_reladdr += src0.reladdr != NULL; ++ num_reladdr += src1.reladdr != NULL; ++ num_reladdr += src2.reladdr != NULL; ++ ++ reladdr_to_temp(ir, &src2, &num_reladdr); ++ reladdr_to_temp(ir, &src1, &num_reladdr); ++ reladdr_to_temp(ir, &src0, &num_reladdr); ++ ++ if (dst.reladdr) { ++ ir_to_mesa_emit_op1(ir, OPCODE_ARL, ir_to_mesa_address_reg, ++ *dst.reladdr); ++ ++ num_reladdr--; ++ } ++ assert(num_reladdr == 0); ++ ++ inst->op = op; ++ inst->dst_reg = dst; ++ inst->src_reg[0] = src0; ++ inst->src_reg[1] = src1; ++ inst->src_reg[2] = src2; ++ inst->ir = ir; ++ ++ inst->function = NULL; ++ ++ this->instructions.push_tail(inst); ++ ++ return inst; ++} ++ ++ ++ir_to_mesa_instruction * ++ir_to_mesa_visitor::ir_to_mesa_emit_op2(ir_instruction *ir, ++ enum prog_opcode op, ++ ir_to_mesa_dst_reg dst, ++ ir_to_mesa_src_reg src0, ++ ir_to_mesa_src_reg src1) ++{ ++ return ir_to_mesa_emit_op3(ir, op, dst, src0, src1, ir_to_mesa_undef); ++} ++ ++ir_to_mesa_instruction * ++ir_to_mesa_visitor::ir_to_mesa_emit_op1(ir_instruction *ir, ++ enum prog_opcode op, ++ ir_to_mesa_dst_reg dst, ++ ir_to_mesa_src_reg src0) ++{ ++ return ir_to_mesa_emit_op3(ir, op, dst, ++ src0, ir_to_mesa_undef, ir_to_mesa_undef); ++} ++ ++ir_to_mesa_instruction * ++ir_to_mesa_visitor::ir_to_mesa_emit_op0(ir_instruction *ir, ++ enum prog_opcode op) ++{ ++ return ir_to_mesa_emit_op3(ir, op, ir_to_mesa_undef_dst, ++ ir_to_mesa_undef, ++ ir_to_mesa_undef, ++ ir_to_mesa_undef); ++} ++ ++void ++ir_to_mesa_visitor::map_sampler(int location, int sampler) ++{ ++ if (this->sampler_map_size <= location) { ++ this->sampler_map = talloc_realloc(this->mem_ctx, this->sampler_map, ++ int, location + 1); ++ this->sampler_map_size = location + 1; ++ } ++ ++ this->sampler_map[location] = sampler; ++} ++ ++int ++ir_to_mesa_visitor::get_sampler_number(int location) ++{ ++ assert(location < this->sampler_map_size); ++ return this->sampler_map[location]; ++} ++ ++inline ir_to_mesa_dst_reg ++ir_to_mesa_dst_reg_from_src(ir_to_mesa_src_reg reg) ++{ ++ ir_to_mesa_dst_reg dst_reg; ++ ++ dst_reg.file = reg.file; ++ dst_reg.index = reg.index; ++ dst_reg.writemask = WRITEMASK_XYZW; ++ dst_reg.cond_mask = COND_TR; ++ dst_reg.reladdr = reg.reladdr; ++ ++ return dst_reg; ++} ++ ++inline ir_to_mesa_src_reg ++ir_to_mesa_src_reg_from_dst(ir_to_mesa_dst_reg reg) ++{ ++ ir_to_mesa_src_reg src_reg; ++ ++ src_reg.file = reg.file; ++ src_reg.index = reg.index; ++ src_reg.swizzle = SWIZZLE_XYZW; ++ src_reg.negate = 0; ++ src_reg.reladdr = reg.reladdr; ++ ++ return src_reg; ++} ++ ++/** ++ * Emits Mesa scalar opcodes to produce unique answers across channels. ++ * ++ * Some Mesa opcodes are scalar-only, like ARB_fp/vp. The src X ++ * channel determines the result across all channels. So to do a vec4 ++ * of this operation, we want to emit a scalar per source channel used ++ * to produce dest channels. ++ */ ++void ++ir_to_mesa_visitor::ir_to_mesa_emit_scalar_op2(ir_instruction *ir, ++ enum prog_opcode op, ++ ir_to_mesa_dst_reg dst, ++ ir_to_mesa_src_reg orig_src0, ++ ir_to_mesa_src_reg orig_src1) ++{ ++ int i, j; ++ int done_mask = ~dst.writemask; ++ ++ /* Mesa RCP is a scalar operation splatting results to all channels, ++ * like ARB_fp/vp. So emit as many RCPs as necessary to cover our ++ * dst channels. ++ */ ++ for (i = 0; i < 4; i++) { ++ GLuint this_mask = (1 << i); ++ ir_to_mesa_instruction *inst; ++ ir_to_mesa_src_reg src0 = orig_src0; ++ ir_to_mesa_src_reg src1 = orig_src1; ++ ++ if (done_mask & this_mask) ++ continue; ++ ++ GLuint src0_swiz = GET_SWZ(src0.swizzle, i); ++ GLuint src1_swiz = GET_SWZ(src1.swizzle, i); ++ for (j = i + 1; j < 4; j++) { ++ if (!(done_mask & (1 << j)) && ++ GET_SWZ(src0.swizzle, j) == src0_swiz && ++ GET_SWZ(src1.swizzle, j) == src1_swiz) { ++ this_mask |= (1 << j); ++ } ++ } ++ src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz, ++ src0_swiz, src0_swiz); ++ src1.swizzle = MAKE_SWIZZLE4(src1_swiz, src1_swiz, ++ src1_swiz, src1_swiz); ++ ++ inst = ir_to_mesa_emit_op2(ir, op, ++ dst, ++ src0, ++ src1); ++ inst->dst_reg.writemask = this_mask; ++ done_mask |= this_mask; ++ } ++} ++ ++void ++ir_to_mesa_visitor::ir_to_mesa_emit_scalar_op1(ir_instruction *ir, ++ enum prog_opcode op, ++ ir_to_mesa_dst_reg dst, ++ ir_to_mesa_src_reg src0) ++{ ++ ir_to_mesa_src_reg undef = ir_to_mesa_undef; ++ ++ undef.swizzle = SWIZZLE_XXXX; ++ ++ ir_to_mesa_emit_scalar_op2(ir, op, dst, src0, undef); ++} ++ ++struct ir_to_mesa_src_reg ++ir_to_mesa_visitor::src_reg_for_float(float val) ++{ ++ ir_to_mesa_src_reg src_reg; ++ ++ src_reg.file = PROGRAM_CONSTANT; ++ src_reg.index = _mesa_add_unnamed_constant(this->prog->Parameters, ++ &val, 1, &src_reg.swizzle); ++ src_reg.reladdr = NULL; ++ src_reg.negate = 0; ++ ++ return src_reg; ++} ++ ++static int ++type_size(const struct glsl_type *type) ++{ ++ unsigned int i; ++ int size; ++ ++ switch (type->base_type) { ++ case GLSL_TYPE_UINT: ++ case GLSL_TYPE_INT: ++ case GLSL_TYPE_FLOAT: ++ case GLSL_TYPE_BOOL: ++ if (type->is_matrix()) { ++ return type->matrix_columns; ++ } else { ++ /* Regardless of size of vector, it gets a vec4. This is bad ++ * packing for things like floats, but otherwise arrays become a ++ * mess. Hopefully a later pass over the code can pack scalars ++ * down if appropriate. ++ */ ++ return 1; ++ } ++ case GLSL_TYPE_ARRAY: ++ return type_size(type->fields.array) * type->length; ++ case GLSL_TYPE_STRUCT: ++ size = 0; ++ for (i = 0; i < type->length; i++) { ++ size += type_size(type->fields.structure[i].type); ++ } ++ return size; ++ default: ++ assert(0); ++ } ++} ++ ++/** ++ * In the initial pass of codegen, we assign temporary numbers to ++ * intermediate results. (not SSA -- variable assignments will reuse ++ * storage). Actual register allocation for the Mesa VM occurs in a ++ * pass over the Mesa IR later. ++ */ ++ir_to_mesa_src_reg ++ir_to_mesa_visitor::get_temp(const glsl_type *type) ++{ ++ ir_to_mesa_src_reg src_reg; ++ int swizzle[4]; ++ int i; ++ ++ assert(!type->is_array()); ++ ++ src_reg.file = PROGRAM_TEMPORARY; ++ src_reg.index = next_temp; ++ src_reg.reladdr = NULL; ++ next_temp += type_size(type); ++ ++ for (i = 0; i < type->vector_elements; i++) ++ swizzle[i] = i; ++ for (; i < 4; i++) ++ swizzle[i] = type->vector_elements - 1; ++ src_reg.swizzle = MAKE_SWIZZLE4(swizzle[0], swizzle[1], ++ swizzle[2], swizzle[3]); ++ src_reg.negate = 0; ++ ++ return src_reg; ++} ++ ++variable_storage * ++ir_to_mesa_visitor::find_variable_storage(ir_variable *var) ++{ ++ ++ variable_storage *entry; ++ ++ foreach_iter(exec_list_iterator, iter, this->variables) { ++ entry = (variable_storage *)iter.get(); ++ ++ if (entry->var == var) ++ return entry; ++ } ++ ++ return NULL; ++} ++ ++void ++ir_to_mesa_visitor::visit(ir_variable *ir) ++{ ++ (void)ir; ++} ++ ++void ++ir_to_mesa_visitor::visit(ir_loop *ir) ++{ ++ assert(!ir->from); ++ assert(!ir->to); ++ assert(!ir->increment); ++ assert(!ir->counter); ++ ++ ir_to_mesa_emit_op0(NULL, OPCODE_BGNLOOP); ++ visit_exec_list(&ir->body_instructions, this); ++ ir_to_mesa_emit_op0(NULL, OPCODE_ENDLOOP); ++} ++ ++void ++ir_to_mesa_visitor::visit(ir_loop_jump *ir) ++{ ++ switch (ir->mode) { ++ case ir_loop_jump::jump_break: ++ ir_to_mesa_emit_op0(NULL, OPCODE_BRK); ++ break; ++ case ir_loop_jump::jump_continue: ++ ir_to_mesa_emit_op0(NULL, OPCODE_CONT); ++ break; ++ } ++} ++ ++ ++void ++ir_to_mesa_visitor::visit(ir_function_signature *ir) ++{ ++ assert(0); ++ (void)ir; ++} ++ ++void ++ir_to_mesa_visitor::visit(ir_function *ir) ++{ ++ /* Ignore function bodies other than main() -- we shouldn't see calls to ++ * them since they should all be inlined before we get to ir_to_mesa. ++ */ ++ if (strcmp(ir->name, "main") == 0) { ++ const ir_function_signature *sig; ++ exec_list empty; ++ ++ sig = ir->matching_signature(&empty); ++ ++ assert(sig); ++ ++ foreach_iter(exec_list_iterator, iter, sig->body) { ++ ir_instruction *ir = (ir_instruction *)iter.get(); ++ ++ ir->accept(this); ++ } ++ } ++} ++ ++GLboolean ++ir_to_mesa_visitor::try_emit_mad(ir_expression *ir, int mul_operand) ++{ ++ int nonmul_operand = 1 - mul_operand; ++ ir_to_mesa_src_reg a, b, c; ++ ++ ir_expression *expr = ir->operands[mul_operand]->as_expression(); ++ if (!expr || expr->operation != ir_binop_mul) ++ return false; ++ ++ expr->operands[0]->accept(this); ++ a = this->result; ++ expr->operands[1]->accept(this); ++ b = this->result; ++ ir->operands[nonmul_operand]->accept(this); ++ c = this->result; ++ ++ this->result = get_temp(ir->type); ++ ir_to_mesa_emit_op3(ir, OPCODE_MAD, ++ ir_to_mesa_dst_reg_from_src(this->result), a, b, c); ++ ++ return true; ++} ++ ++void ++ir_to_mesa_visitor::reladdr_to_temp(ir_instruction *ir, ++ ir_to_mesa_src_reg *reg, int *num_reladdr) ++{ ++ if (!reg->reladdr) ++ return; ++ ++ ir_to_mesa_emit_op1(ir, OPCODE_ARL, ir_to_mesa_address_reg, *reg->reladdr); ++ ++ if (*num_reladdr != 1) { ++ ir_to_mesa_src_reg temp = get_temp(glsl_type::vec4_type); ++ ++ ir_to_mesa_emit_op1(ir, OPCODE_MOV, ++ ir_to_mesa_dst_reg_from_src(temp), *reg); ++ *reg = temp; ++ } ++ ++ (*num_reladdr)--; ++} ++ ++void ++ir_to_mesa_visitor::visit(ir_expression *ir) ++{ ++ unsigned int operand; ++ struct ir_to_mesa_src_reg op[2]; ++ struct ir_to_mesa_src_reg result_src; ++ struct ir_to_mesa_dst_reg result_dst; ++ const glsl_type *vec4_type = glsl_type::get_instance(GLSL_TYPE_FLOAT, 4, 1); ++ const glsl_type *vec3_type = glsl_type::get_instance(GLSL_TYPE_FLOAT, 3, 1); ++ const glsl_type *vec2_type = glsl_type::get_instance(GLSL_TYPE_FLOAT, 2, 1); ++ ++ /* Quick peephole: Emit OPCODE_MAD(a, b, c) instead of ADD(MUL(a, b), c) ++ */ ++ if (ir->operation == ir_binop_add) { ++ if (try_emit_mad(ir, 1)) ++ return; ++ if (try_emit_mad(ir, 0)) ++ return; ++ } ++ ++ for (operand = 0; operand < ir->get_num_operands(); operand++) { ++ this->result.file = PROGRAM_UNDEFINED; ++ ir->operands[operand]->accept(this); ++ if (this->result.file == PROGRAM_UNDEFINED) { ++ ir_print_visitor v; ++ printf("Failed to get tree for expression operand:\n"); ++ ir->operands[operand]->accept(&v); ++ exit(1); ++ } ++ op[operand] = this->result; ++ ++ /* Matrix expression operands should have been broken down to vector ++ * operations already. ++ */ ++ assert(!ir->operands[operand]->type->is_matrix()); ++ } ++ ++ this->result.file = PROGRAM_UNDEFINED; ++ ++ /* Storage for our result. Ideally for an assignment we'd be using ++ * the actual storage for the result here, instead. ++ */ ++ result_src = get_temp(ir->type); ++ /* convenience for the emit functions below. */ ++ result_dst = ir_to_mesa_dst_reg_from_src(result_src); ++ /* Limit writes to the channels that will be used by result_src later. ++ * This does limit this temp's use as a temporary for multi-instruction ++ * sequences. ++ */ ++ result_dst.writemask = (1 << ir->type->vector_elements) - 1; ++ ++ switch (ir->operation) { ++ case ir_unop_logic_not: ++ ir_to_mesa_emit_op2(ir, OPCODE_SEQ, result_dst, ++ op[0], src_reg_for_float(0.0)); ++ break; ++ case ir_unop_neg: ++ op[0].negate = ~op[0].negate; ++ result_src = op[0]; ++ break; ++ case ir_unop_abs: ++ ir_to_mesa_emit_op1(ir, OPCODE_ABS, result_dst, op[0]); ++ break; ++ case ir_unop_sign: ++ ir_to_mesa_emit_op1(ir, OPCODE_SSG, result_dst, op[0]); ++ break; ++ case ir_unop_rcp: ++ ir_to_mesa_emit_scalar_op1(ir, OPCODE_RCP, result_dst, op[0]); ++ break; ++ ++ case ir_unop_exp: ++ ir_to_mesa_emit_scalar_op2(ir, OPCODE_POW, result_dst, ++ src_reg_for_float(M_E), op[0]); ++ break; ++ case ir_unop_exp2: ++ ir_to_mesa_emit_scalar_op1(ir, OPCODE_EX2, result_dst, op[0]); ++ break; ++ case ir_unop_log: ++ ir_to_mesa_emit_scalar_op1(ir, OPCODE_LOG, result_dst, op[0]); ++ break; ++ case ir_unop_log2: ++ ir_to_mesa_emit_scalar_op1(ir, OPCODE_LG2, result_dst, op[0]); ++ break; ++ case ir_unop_sin: ++ ir_to_mesa_emit_scalar_op1(ir, OPCODE_SIN, result_dst, op[0]); ++ break; ++ case ir_unop_cos: ++ ir_to_mesa_emit_scalar_op1(ir, OPCODE_COS, result_dst, op[0]); ++ break; ++ ++ case ir_unop_dFdx: ++ ir_to_mesa_emit_op1(ir, OPCODE_DDX, result_dst, op[0]); ++ break; ++ case ir_unop_dFdy: ++ ir_to_mesa_emit_op1(ir, OPCODE_DDY, result_dst, op[0]); ++ break; ++ ++ case ir_binop_add: ++ ir_to_mesa_emit_op2(ir, OPCODE_ADD, result_dst, op[0], op[1]); ++ break; ++ case ir_binop_sub: ++ ir_to_mesa_emit_op2(ir, OPCODE_SUB, result_dst, op[0], op[1]); ++ break; ++ ++ case ir_binop_mul: ++ ir_to_mesa_emit_op2(ir, OPCODE_MUL, result_dst, op[0], op[1]); ++ break; ++ case ir_binop_div: ++ assert(!"not reached: should be handled by ir_div_to_mul_rcp"); ++ case ir_binop_mod: ++ assert(!"ir_binop_mod should have been converted to b * fract(a/b)"); ++ break; ++ ++ case ir_binop_less: ++ ir_to_mesa_emit_op2(ir, OPCODE_SLT, result_dst, op[0], op[1]); ++ break; ++ case ir_binop_greater: ++ ir_to_mesa_emit_op2(ir, OPCODE_SGT, result_dst, op[0], op[1]); ++ break; ++ case ir_binop_lequal: ++ ir_to_mesa_emit_op2(ir, OPCODE_SLE, result_dst, op[0], op[1]); ++ break; ++ case ir_binop_gequal: ++ ir_to_mesa_emit_op2(ir, OPCODE_SGE, result_dst, op[0], op[1]); ++ break; ++ case ir_binop_equal: ++ ir_to_mesa_emit_op2(ir, OPCODE_SEQ, result_dst, op[0], op[1]); ++ break; ++ case ir_binop_logic_xor: ++ case ir_binop_nequal: ++ ir_to_mesa_emit_op2(ir, OPCODE_SNE, result_dst, op[0], op[1]); ++ break; ++ ++ case ir_binop_logic_or: ++ /* This could be a saturated add and skip the SNE. */ ++ ir_to_mesa_emit_op2(ir, OPCODE_ADD, ++ result_dst, ++ op[0], op[1]); ++ ++ ir_to_mesa_emit_op2(ir, OPCODE_SNE, ++ result_dst, ++ result_src, src_reg_for_float(0.0)); ++ break; ++ ++ case ir_binop_logic_and: ++ /* the bool args are stored as float 0.0 or 1.0, so "mul" gives us "and". */ ++ ir_to_mesa_emit_op2(ir, OPCODE_MUL, ++ result_dst, ++ op[0], op[1]); ++ break; ++ ++ case ir_binop_dot: ++ if (ir->operands[0]->type == vec4_type) { ++ assert(ir->operands[1]->type == vec4_type); ++ ir_to_mesa_emit_op2(ir, OPCODE_DP4, ++ result_dst, ++ op[0], op[1]); ++ } else if (ir->operands[0]->type == vec3_type) { ++ assert(ir->operands[1]->type == vec3_type); ++ ir_to_mesa_emit_op2(ir, OPCODE_DP3, ++ result_dst, ++ op[0], op[1]); ++ } else if (ir->operands[0]->type == vec2_type) { ++ assert(ir->operands[1]->type == vec2_type); ++ ir_to_mesa_emit_op2(ir, OPCODE_DP2, ++ result_dst, ++ op[0], op[1]); ++ } ++ break; ++ ++ case ir_binop_cross: ++ ir_to_mesa_emit_op2(ir, OPCODE_XPD, result_dst, op[0], op[1]); ++ break; ++ ++ case ir_unop_sqrt: ++ ir_to_mesa_emit_scalar_op1(ir, OPCODE_RSQ, result_dst, op[0]); ++ ir_to_mesa_emit_scalar_op1(ir, OPCODE_RCP, result_dst, result_src); ++ /* For incoming channels < 0, set the result to 0. */ ++ ir_to_mesa_emit_op3(ir, OPCODE_CMP, result_dst, ++ op[0], src_reg_for_float(0.0), result_src); ++ break; ++ case ir_unop_rsq: ++ ir_to_mesa_emit_scalar_op1(ir, OPCODE_RSQ, result_dst, op[0]); ++ break; ++ case ir_unop_i2f: ++ case ir_unop_b2f: ++ case ir_unop_b2i: ++ /* Mesa IR lacks types, ints are stored as truncated floats. */ ++ result_src = op[0]; ++ break; ++ case ir_unop_f2i: ++ ir_to_mesa_emit_op1(ir, OPCODE_TRUNC, result_dst, op[0]); ++ break; ++ case ir_unop_f2b: ++ case ir_unop_i2b: ++ ir_to_mesa_emit_op2(ir, OPCODE_SNE, result_dst, ++ result_src, src_reg_for_float(0.0)); ++ break; ++ case ir_unop_trunc: ++ ir_to_mesa_emit_op1(ir, OPCODE_TRUNC, result_dst, op[0]); ++ break; ++ case ir_unop_ceil: ++ op[0].negate = ~op[0].negate; ++ ir_to_mesa_emit_op1(ir, OPCODE_FLR, result_dst, op[0]); ++ result_src.negate = ~result_src.negate; ++ break; ++ case ir_unop_floor: ++ ir_to_mesa_emit_op1(ir, OPCODE_FLR, result_dst, op[0]); ++ break; ++ case ir_unop_fract: ++ ir_to_mesa_emit_op1(ir, OPCODE_FRC, result_dst, op[0]); ++ break; ++ ++ case ir_binop_min: ++ ir_to_mesa_emit_op2(ir, OPCODE_MIN, result_dst, op[0], op[1]); ++ break; ++ case ir_binop_max: ++ ir_to_mesa_emit_op2(ir, OPCODE_MAX, result_dst, op[0], op[1]); ++ break; ++ case ir_binop_pow: ++ ir_to_mesa_emit_scalar_op2(ir, OPCODE_POW, result_dst, op[0], op[1]); ++ break; ++ ++ case ir_unop_bit_not: ++ case ir_unop_u2f: ++ case ir_binop_lshift: ++ case ir_binop_rshift: ++ case ir_binop_bit_and: ++ case ir_binop_bit_xor: ++ case ir_binop_bit_or: ++ assert(!"GLSL 1.30 features unsupported"); ++ break; ++ } ++ ++ this->result = result_src; ++} ++ ++ ++void ++ir_to_mesa_visitor::visit(ir_swizzle *ir) ++{ ++ ir_to_mesa_src_reg src_reg; ++ int i; ++ int swizzle[4]; ++ ++ /* Note that this is only swizzles in expressions, not those on the left ++ * hand side of an assignment, which do write masking. See ir_assignment ++ * for that. ++ */ ++ ++ ir->val->accept(this); ++ src_reg = this->result; ++ assert(src_reg.file != PROGRAM_UNDEFINED); ++ ++ for (i = 0; i < 4; i++) { ++ if (i < ir->type->vector_elements) { ++ switch (i) { ++ case 0: ++ swizzle[i] = GET_SWZ(src_reg.swizzle, ir->mask.x); ++ break; ++ case 1: ++ swizzle[i] = GET_SWZ(src_reg.swizzle, ir->mask.y); ++ break; ++ case 2: ++ swizzle[i] = GET_SWZ(src_reg.swizzle, ir->mask.z); ++ break; ++ case 3: ++ swizzle[i] = GET_SWZ(src_reg.swizzle, ir->mask.w); ++ break; ++ } ++ } else { ++ /* If the type is smaller than a vec4, replicate the last ++ * channel out. ++ */ ++ swizzle[i] = swizzle[ir->type->vector_elements - 1]; ++ } ++ } ++ ++ src_reg.swizzle = MAKE_SWIZZLE4(swizzle[0], ++ swizzle[1], ++ swizzle[2], ++ swizzle[3]); ++ ++ this->result = src_reg; ++} ++ ++static int ++add_matrix_ref(struct gl_program *prog, int *tokens) ++{ ++ int base_pos = -1; ++ int i; ++ ++ /* Add a ref for each column. It looks like the reason we do ++ * it this way is that _mesa_add_state_reference doesn't work ++ * for things that aren't vec4s, so the tokens[2]/tokens[3] ++ * range has to be equal. ++ */ ++ for (i = 0; i < 4; i++) { ++ tokens[2] = i; ++ tokens[3] = i; ++ int pos = _mesa_add_state_reference(prog->Parameters, ++ (gl_state_index *)tokens); ++ if (base_pos == -1) ++ base_pos = pos; ++ else ++ assert(base_pos + i == pos); ++ } ++ ++ return base_pos; ++} ++ ++static variable_storage * ++get_builtin_matrix_ref(void *mem_ctx, struct gl_program *prog, ir_variable *var, ++ ir_rvalue *array_index) ++{ ++ /* ++ * NOTE: The ARB_vertex_program extension specified that matrices get ++ * loaded in registers in row-major order. With GLSL, we want column- ++ * major order. So, we need to transpose all matrices here... ++ */ ++ static const struct { ++ const char *name; ++ int matrix; ++ int modifier; ++ } matrices[] = { ++ { "gl_ModelViewMatrix", STATE_MODELVIEW_MATRIX, STATE_MATRIX_TRANSPOSE }, ++ { "gl_ModelViewMatrixInverse", STATE_MODELVIEW_MATRIX, STATE_MATRIX_INVTRANS }, ++ { "gl_ModelViewMatrixTranspose", STATE_MODELVIEW_MATRIX, 0 }, ++ { "gl_ModelViewMatrixInverseTranspose", STATE_MODELVIEW_MATRIX, STATE_MATRIX_INVERSE }, ++ ++ { "gl_ProjectionMatrix", STATE_PROJECTION_MATRIX, STATE_MATRIX_TRANSPOSE }, ++ { "gl_ProjectionMatrixInverse", STATE_PROJECTION_MATRIX, STATE_MATRIX_INVTRANS }, ++ { "gl_ProjectionMatrixTranspose", STATE_PROJECTION_MATRIX, 0 }, ++ { "gl_ProjectionMatrixInverseTranspose", STATE_PROJECTION_MATRIX, STATE_MATRIX_INVERSE }, ++ ++ { "gl_ModelViewProjectionMatrix", STATE_MVP_MATRIX, STATE_MATRIX_TRANSPOSE }, ++ { "gl_ModelViewProjectionMatrixInverse", STATE_MVP_MATRIX, STATE_MATRIX_INVTRANS }, ++ { "gl_ModelViewProjectionMatrixTranspose", STATE_MVP_MATRIX, 0 }, ++ { "gl_ModelViewProjectionMatrixInverseTranspose", STATE_MVP_MATRIX, STATE_MATRIX_INVERSE }, ++ ++ { "gl_TextureMatrix", STATE_TEXTURE_MATRIX, STATE_MATRIX_TRANSPOSE }, ++ { "gl_TextureMatrixInverse", STATE_TEXTURE_MATRIX, STATE_MATRIX_INVTRANS }, ++ { "gl_TextureMatrixTranspose", STATE_TEXTURE_MATRIX, 0 }, ++ { "gl_TextureMatrixInverseTranspose", STATE_TEXTURE_MATRIX, STATE_MATRIX_INVERSE }, ++ ++ { "gl_NormalMatrix", STATE_MODELVIEW_MATRIX, STATE_MATRIX_INVERSE }, ++ ++ }; ++ unsigned int i; ++ variable_storage *entry; ++ ++ /* C++ gets angry when we try to use an int as a gl_state_index, so we use ++ * ints for gl_state_index. Make sure they're compatible. ++ */ ++ assert(sizeof(gl_state_index) == sizeof(int)); ++ ++ for (i = 0; i < Elements(matrices); i++) { ++ if (strcmp(var->name, matrices[i].name) == 0) { ++ int tokens[STATE_LENGTH]; ++ int base_pos = -1; ++ ++ tokens[0] = matrices[i].matrix; ++ tokens[4] = matrices[i].modifier; ++ if (matrices[i].matrix == STATE_TEXTURE_MATRIX) { ++ ir_constant *index = array_index->constant_expression_value(); ++ if (index) { ++ tokens[1] = index->value.i[0]; ++ base_pos = add_matrix_ref(prog, tokens); ++ } else { ++ for (i = 0; i < var->type->length; i++) { ++ tokens[1] = i; ++ int pos = add_matrix_ref(prog, tokens); ++ if (base_pos == -1) ++ base_pos = pos; ++ else ++ assert(base_pos + (int)i * 4 == pos); ++ } ++ } ++ } else { ++ tokens[1] = 0; /* unused array index */ ++ base_pos = add_matrix_ref(prog, tokens); ++ } ++ tokens[4] = matrices[i].modifier; ++ ++ entry = new(mem_ctx) variable_storage(var, ++ PROGRAM_STATE_VAR, ++ base_pos); ++ ++ return entry; ++ } ++ } ++ ++ return NULL; ++} ++ ++void ++ir_to_mesa_visitor::visit(ir_dereference_variable *ir) ++{ ++ ir_to_mesa_src_reg src_reg; ++ variable_storage *entry = find_variable_storage(ir->var); ++ unsigned int loc; ++ ++ if (!entry) { ++ switch (ir->var->mode) { ++ case ir_var_uniform: ++ entry = get_builtin_matrix_ref(this->mem_ctx, this->prog, ir->var, ++ NULL); ++ if (entry) ++ break; ++ ++ /* FINISHME: Fix up uniform name for arrays and things */ ++ if (ir->var->type->base_type == GLSL_TYPE_SAMPLER) { ++ /* FINISHME: we whack the location of the var here, which ++ * is probably not expected. But we need to communicate ++ * mesa's sampler number to the tex instruction. ++ */ ++ int sampler = _mesa_add_sampler(this->prog->Parameters, ++ ir->var->name, ++ ir->var->type->gl_type); ++ map_sampler(ir->var->location, sampler); ++ ++ entry = new(mem_ctx) variable_storage(ir->var, PROGRAM_SAMPLER, ++ sampler); ++ this->variables.push_tail(entry); ++ break; ++ } ++ ++ assert(ir->var->type->gl_type != 0 && ++ ir->var->type->gl_type != GL_INVALID_ENUM); ++ loc = _mesa_add_uniform(this->prog->Parameters, ++ ir->var->name, ++ type_size(ir->var->type) * 4, ++ ir->var->type->gl_type, ++ NULL); ++ ++ /* Always mark the uniform used at this point. If it isn't ++ * used, dead code elimination should have nuked the decl already. ++ */ ++ this->prog->Parameters->Parameters[loc].Used = GL_TRUE; ++ ++ entry = new(mem_ctx) variable_storage(ir->var, PROGRAM_UNIFORM, loc); ++ this->variables.push_tail(entry); ++ break; ++ case ir_var_in: ++ case ir_var_out: ++ case ir_var_inout: ++ /* The linker assigns locations for varyings and attributes, ++ * including deprecated builtins (like gl_Color), user-assign ++ * generic attributes (glBindVertexLocation), and ++ * user-defined varyings. ++ * ++ * FINISHME: We would hit this path for function arguments. Fix! ++ */ ++ assert(ir->var->location != -1); ++ if (ir->var->mode == ir_var_in || ++ ir->var->mode == ir_var_inout) { ++ entry = new(mem_ctx) variable_storage(ir->var, ++ PROGRAM_INPUT, ++ ir->var->location); ++ ++ if (this->prog->Target == GL_VERTEX_PROGRAM_ARB && ++ ir->var->location >= VERT_ATTRIB_GENERIC0) { ++ _mesa_add_attribute(prog->Attributes, ++ ir->var->name, ++ type_size(ir->var->type) * 4, ++ ir->var->type->gl_type, ++ ir->var->location - VERT_ATTRIB_GENERIC0); ++ } ++ } else { ++ entry = new(mem_ctx) variable_storage(ir->var, ++ PROGRAM_OUTPUT, ++ ir->var->location); ++ } ++ ++ break; ++ case ir_var_auto: ++ case ir_var_temporary: ++ entry = new(mem_ctx) variable_storage(ir->var, PROGRAM_TEMPORARY, ++ this->next_temp); ++ this->variables.push_tail(entry); ++ ++ next_temp += type_size(ir->var->type); ++ break; ++ } ++ ++ if (!entry) { ++ printf("Failed to make storage for %s\n", ir->var->name); ++ exit(1); ++ } ++ } ++ ++ src_reg.file = entry->file; ++ src_reg.index = entry->index; ++ /* If the type is smaller than a vec4, replicate the last channel out. */ ++ if (ir->type->is_scalar() || ir->type->is_vector()) ++ src_reg.swizzle = swizzle_for_size(ir->var->type->vector_elements); ++ else ++ src_reg.swizzle = SWIZZLE_NOOP; ++ src_reg.reladdr = NULL; ++ src_reg.negate = 0; ++ ++ this->result = src_reg; ++} ++ ++void ++ir_to_mesa_visitor::visit(ir_dereference_array *ir) ++{ ++ ir_constant *index; ++ ir_to_mesa_src_reg src_reg; ++ ir_dereference_variable *deref_var = ir->array->as_dereference_variable(); ++ int element_size = type_size(ir->type); ++ ++ index = ir->array_index->constant_expression_value(); ++ ++ if (deref_var && strncmp(deref_var->var->name, ++ "gl_TextureMatrix", ++ strlen("gl_TextureMatrix")) == 0) { ++ ir_to_mesa_src_reg src_reg; ++ struct variable_storage *entry; ++ ++ entry = get_builtin_matrix_ref(this->mem_ctx, this->prog, deref_var->var, ++ ir->array_index); ++ assert(entry); ++ ++ src_reg.file = entry->file; ++ src_reg.index = entry->index; ++ src_reg.swizzle = swizzle_for_size(ir->type->vector_elements); ++ src_reg.negate = 0; ++ ++ if (index) { ++ src_reg.reladdr = NULL; ++ } else { ++ ir_to_mesa_src_reg index_reg = get_temp(glsl_type::float_type); ++ ++ ir->array_index->accept(this); ++ ir_to_mesa_emit_op2(ir, OPCODE_MUL, ++ ir_to_mesa_dst_reg_from_src(index_reg), ++ this->result, src_reg_for_float(element_size)); ++ ++ src_reg.reladdr = talloc(mem_ctx, ir_to_mesa_src_reg); ++ memcpy(src_reg.reladdr, &index_reg, sizeof(index_reg)); ++ } ++ ++ this->result = src_reg; ++ return; ++ } ++ ++ ir->array->accept(this); ++ src_reg = this->result; ++ ++ if (index) { ++ src_reg.index += index->value.i[0] * element_size; ++ } else { ++ ir_to_mesa_src_reg array_base = this->result; ++ /* Variable index array dereference. It eats the "vec4" of the ++ * base of the array and an index that offsets the Mesa register ++ * index. ++ */ ++ ir->array_index->accept(this); ++ ++ ir_to_mesa_src_reg index_reg; ++ ++ if (element_size == 1) { ++ index_reg = this->result; ++ } else { ++ index_reg = get_temp(glsl_type::float_type); ++ ++ ir_to_mesa_emit_op2(ir, OPCODE_MUL, ++ ir_to_mesa_dst_reg_from_src(index_reg), ++ this->result, src_reg_for_float(element_size)); ++ } ++ ++ src_reg.reladdr = talloc(mem_ctx, ir_to_mesa_src_reg); ++ memcpy(src_reg.reladdr, &index_reg, sizeof(index_reg)); ++ } ++ ++ /* If the type is smaller than a vec4, replicate the last channel out. */ ++ if (ir->type->is_scalar() || ir->type->is_vector()) ++ src_reg.swizzle = swizzle_for_size(ir->type->vector_elements); ++ else ++ src_reg.swizzle = SWIZZLE_NOOP; ++ ++ this->result = src_reg; ++} ++ ++void ++ir_to_mesa_visitor::visit(ir_dereference_record *ir) ++{ ++ unsigned int i; ++ const glsl_type *struct_type = ir->record->type; ++ int offset = 0; ++ ++ ir->record->accept(this); ++ ++ for (i = 0; i < struct_type->length; i++) { ++ if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0) ++ break; ++ offset += type_size(struct_type->fields.structure[i].type); ++ } ++ this->result.swizzle = swizzle_for_size(ir->type->vector_elements); ++ this->result.index += offset; ++} ++ ++/** ++ * We want to be careful in assignment setup to hit the actual storage ++ * instead of potentially using a temporary like we might with the ++ * ir_dereference handler. ++ * ++ * Thanks to ir_swizzle_swizzle, and ir_vec_index_to_swizzle, we ++ * should only see potentially one variable array index of a vector, ++ * and one swizzle, before getting to actual vec4 storage. So handle ++ * those, then go use ir_dereference to handle the rest. ++ */ ++static struct ir_to_mesa_dst_reg ++get_assignment_lhs(ir_instruction *ir, ir_to_mesa_visitor *v, ++ ir_to_mesa_src_reg *r) ++{ ++ struct ir_to_mesa_dst_reg dst_reg; ++ ir_swizzle *swiz; ++ ++ ir_dereference_array *deref_array = ir->as_dereference_array(); ++ /* This should have been handled by ir_vec_index_to_cond_assign */ ++ if (deref_array) { ++ assert(!deref_array->array->type->is_vector()); ++ } ++ ++ /* Use the rvalue deref handler for the most part. We'll ignore ++ * swizzles in it and write swizzles using writemask, though. ++ */ ++ ir->accept(v); ++ dst_reg = ir_to_mesa_dst_reg_from_src(v->result); ++ ++ if ((swiz = ir->as_swizzle())) { ++ int swizzles[4] = { ++ swiz->mask.x, ++ swiz->mask.y, ++ swiz->mask.z, ++ swiz->mask.w ++ }; ++ int new_r_swizzle[4]; ++ int orig_r_swizzle = r->swizzle; ++ int i; ++ ++ for (i = 0; i < 4; i++) { ++ new_r_swizzle[i] = GET_SWZ(orig_r_swizzle, 0); ++ } ++ ++ dst_reg.writemask = 0; ++ for (i = 0; i < 4; i++) { ++ if (i < swiz->mask.num_components) { ++ dst_reg.writemask |= 1 << swizzles[i]; ++ new_r_swizzle[swizzles[i]] = GET_SWZ(orig_r_swizzle, i); ++ } ++ } ++ ++ r->swizzle = MAKE_SWIZZLE4(new_r_swizzle[0], ++ new_r_swizzle[1], ++ new_r_swizzle[2], ++ new_r_swizzle[3]); ++ } ++ ++ return dst_reg; ++} ++ ++void ++ir_to_mesa_visitor::visit(ir_assignment *ir) ++{ ++ struct ir_to_mesa_dst_reg l; ++ struct ir_to_mesa_src_reg r; ++ int i; ++ ++ assert(!ir->lhs->type->is_array()); ++ ++ ir->rhs->accept(this); ++ r = this->result; ++ ++ l = get_assignment_lhs(ir->lhs, this, &r); ++ ++ assert(l.file != PROGRAM_UNDEFINED); ++ assert(r.file != PROGRAM_UNDEFINED); ++ ++ if (ir->condition) { ++ ir_to_mesa_src_reg condition; ++ ++ ir->condition->accept(this); ++ condition = this->result; ++ ++ /* We use the OPCODE_CMP (a < 0 ? b : c) for conditional moves, ++ * and the condition we produced is 0.0 or 1.0. By flipping the ++ * sign, we can choose which value OPCODE_CMP produces without ++ * an extra computing the condition. ++ */ ++ condition.negate = ~condition.negate; ++ for (i = 0; i < type_size(ir->lhs->type); i++) { ++ ir_to_mesa_emit_op3(ir, OPCODE_CMP, l, ++ condition, r, ir_to_mesa_src_reg_from_dst(l)); ++ l.index++; ++ r.index++; ++ } ++ } else { ++ for (i = 0; i < type_size(ir->lhs->type); i++) { ++ ir_to_mesa_emit_op1(ir, OPCODE_MOV, l, r); ++ l.index++; ++ r.index++; ++ } ++ } ++} ++ ++ ++void ++ir_to_mesa_visitor::visit(ir_constant *ir) ++{ ++ ir_to_mesa_src_reg src_reg; ++ GLfloat stack_vals[4]; ++ GLfloat *values = stack_vals; ++ unsigned int i; ++ ++ if (ir->type->is_array()) { ++ ir->print(); ++ printf("\n"); ++ assert(!"FINISHME: array constants"); ++ } ++ ++ if (ir->type->is_matrix()) { ++ /* Unfortunately, 4 floats is all we can get into ++ * _mesa_add_unnamed_constant. So, make a temp to store the ++ * matrix and move each constant value into it. If we get ++ * lucky, copy propagation will eliminate the extra moves. ++ */ ++ ir_to_mesa_src_reg mat = get_temp(glsl_type::vec4_type); ++ ir_to_mesa_dst_reg mat_column = ir_to_mesa_dst_reg_from_src(mat); ++ ++ for (i = 0; i < ir->type->matrix_columns; i++) { ++ src_reg.file = PROGRAM_CONSTANT; ++ ++ assert(ir->type->base_type == GLSL_TYPE_FLOAT); ++ values = &ir->value.f[i * ir->type->vector_elements]; ++ ++ src_reg.index = _mesa_add_unnamed_constant(this->prog->Parameters, ++ values, ++ ir->type->vector_elements, ++ &src_reg.swizzle); ++ src_reg.reladdr = NULL; ++ src_reg.negate = 0; ++ ir_to_mesa_emit_op1(ir, OPCODE_MOV, mat_column, src_reg); ++ ++ mat_column.index++; ++ } ++ ++ this->result = mat; ++ } ++ ++ src_reg.file = PROGRAM_CONSTANT; ++ switch (ir->type->base_type) { ++ case GLSL_TYPE_FLOAT: ++ values = &ir->value.f[0]; ++ break; ++ case GLSL_TYPE_UINT: ++ for (i = 0; i < ir->type->vector_elements; i++) { ++ values[i] = ir->value.u[i]; ++ } ++ break; ++ case GLSL_TYPE_INT: ++ for (i = 0; i < ir->type->vector_elements; i++) { ++ values[i] = ir->value.i[i]; ++ } ++ break; ++ case GLSL_TYPE_BOOL: ++ for (i = 0; i < ir->type->vector_elements; i++) { ++ values[i] = ir->value.b[i]; ++ } ++ break; ++ default: ++ assert(!"Non-float/uint/int/bool constant"); ++ } ++ ++ src_reg.index = _mesa_add_unnamed_constant(this->prog->Parameters, ++ values, ir->type->vector_elements, ++ &src_reg.swizzle); ++ src_reg.reladdr = NULL; ++ src_reg.negate = 0; ++ ++ this->result = src_reg; ++} ++ ++function_entry * ++ir_to_mesa_visitor::get_function_signature(ir_function_signature *sig) ++{ ++ function_entry *entry; ++ ++ foreach_iter(exec_list_iterator, iter, this->function_signatures) { ++ entry = (function_entry *)iter.get(); ++ ++ if (entry->sig == sig) ++ return entry; ++ } ++ ++ entry = talloc(mem_ctx, function_entry); ++ entry->sig = sig; ++ entry->sig_id = this->next_signature_id++; ++ entry->bgn_inst = NULL; ++ ++ /* Allocate storage for all the parameters. */ ++ foreach_iter(exec_list_iterator, iter, sig->parameters) { ++ ir_variable *param = (ir_variable *)iter.get(); ++ variable_storage *storage; ++ ++ storage = find_variable_storage(param); ++ assert(!storage); ++ ++ storage = new(mem_ctx) variable_storage(param, PROGRAM_TEMPORARY, ++ this->next_temp); ++ this->variables.push_tail(storage); ++ ++ this->next_temp += type_size(param->type); ++ break; ++ } ++ ++ if (sig->return_type) { ++ entry->return_reg = get_temp(sig->return_type); ++ } else { ++ entry->return_reg = ir_to_mesa_undef; ++ } ++ ++ this->function_signatures.push_tail(entry); ++ return entry; ++} ++ ++void ++ir_to_mesa_visitor::visit(ir_call *ir) ++{ ++ ir_to_mesa_instruction *call_inst; ++ ir_function_signature *sig = ir->get_callee(); ++ function_entry *entry = get_function_signature(sig); ++ int i; ++ ++ /* Process in parameters. */ ++ exec_list_iterator sig_iter = sig->parameters.iterator(); ++ foreach_iter(exec_list_iterator, iter, *ir) { ++ ir_rvalue *param_rval = (ir_rvalue *)iter.get(); ++ ir_variable *param = (ir_variable *)sig_iter.get(); ++ ++ if (param->mode == ir_var_in || ++ param->mode == ir_var_inout) { ++ variable_storage *storage = find_variable_storage(param); ++ assert(storage); ++ ++ param_rval->accept(this); ++ ir_to_mesa_src_reg r = this->result; ++ ++ ir_to_mesa_dst_reg l; ++ l.file = storage->file; ++ l.index = storage->index; ++ l.reladdr = NULL; ++ l.writemask = WRITEMASK_XYZW; ++ l.cond_mask = COND_TR; ++ ++ for (i = 0; i < type_size(param->type); i++) { ++ ir_to_mesa_emit_op1(ir, OPCODE_MOV, l, r); ++ l.index++; ++ r.index++; ++ } ++ } ++ ++ sig_iter.next(); ++ } ++ assert(!sig_iter.has_next()); ++ ++ /* Emit call instruction */ ++ call_inst = ir_to_mesa_emit_op1(ir, OPCODE_CAL, ++ ir_to_mesa_undef_dst, ir_to_mesa_undef); ++ call_inst->function = entry; ++ ++ /* Process out parameters. */ ++ sig_iter = sig->parameters.iterator(); ++ foreach_iter(exec_list_iterator, iter, *ir) { ++ ir_rvalue *param_rval = (ir_rvalue *)iter.get(); ++ ir_variable *param = (ir_variable *)sig_iter.get(); ++ ++ if (param->mode == ir_var_out || ++ param->mode == ir_var_inout) { ++ variable_storage *storage = find_variable_storage(param); ++ assert(storage); ++ ++ ir_to_mesa_src_reg r; ++ r.file = storage->file; ++ r.index = storage->index; ++ r.reladdr = NULL; ++ r.swizzle = SWIZZLE_NOOP; ++ r.negate = 0; ++ ++ param_rval->accept(this); ++ ir_to_mesa_dst_reg l = ir_to_mesa_dst_reg_from_src(this->result); ++ ++ for (i = 0; i < type_size(param->type); i++) { ++ ir_to_mesa_emit_op1(ir, OPCODE_MOV, l, r); ++ l.index++; ++ r.index++; ++ } ++ } ++ ++ sig_iter.next(); ++ } ++ assert(!sig_iter.has_next()); ++ ++ /* Process return value. */ ++ this->result = entry->return_reg; ++} ++ ++ ++void ++ir_to_mesa_visitor::visit(ir_texture *ir) ++{ ++ ir_to_mesa_src_reg result_src, coord, lod_info = { 0 }, projector; ++ ir_to_mesa_dst_reg result_dst, coord_dst; ++ ir_to_mesa_instruction *inst = NULL; ++ prog_opcode opcode = OPCODE_NOP; ++ ++ ir->coordinate->accept(this); ++ ++ /* Put our coords in a temp. We'll need to modify them for shadow, ++ * projection, or LOD, so the only case we'd use it as is is if ++ * we're doing plain old texturing. Mesa IR optimization should ++ * handle cleaning up our mess in that case. ++ */ ++ coord = get_temp(glsl_type::vec4_type); ++ coord_dst = ir_to_mesa_dst_reg_from_src(coord); ++ ir_to_mesa_emit_op1(ir, OPCODE_MOV, coord_dst, ++ this->result); ++ ++ if (ir->projector) { ++ ir->projector->accept(this); ++ projector = this->result; ++ } ++ ++ /* Storage for our result. Ideally for an assignment we'd be using ++ * the actual storage for the result here, instead. ++ */ ++ result_src = get_temp(glsl_type::vec4_type); ++ result_dst = ir_to_mesa_dst_reg_from_src(result_src); ++ ++ switch (ir->op) { ++ case ir_tex: ++ opcode = OPCODE_TEX; ++ break; ++ case ir_txb: ++ opcode = OPCODE_TXB; ++ ir->lod_info.bias->accept(this); ++ lod_info = this->result; ++ break; ++ case ir_txl: ++ opcode = OPCODE_TXL; ++ ir->lod_info.lod->accept(this); ++ lod_info = this->result; ++ break; ++ case ir_txd: ++ case ir_txf: ++ assert(!"GLSL 1.30 features unsupported"); ++ break; ++ } ++ ++ if (ir->projector) { ++ if (opcode == OPCODE_TEX) { ++ /* Slot the projector in as the last component of the coord. */ ++ coord_dst.writemask = WRITEMASK_W; ++ ir_to_mesa_emit_op1(ir, OPCODE_MOV, coord_dst, projector); ++ coord_dst.writemask = WRITEMASK_XYZW; ++ opcode = OPCODE_TXP; ++ } else { ++ ir_to_mesa_src_reg coord_w = coord; ++ coord_w.swizzle = SWIZZLE_WWWW; ++ ++ /* For the other TEX opcodes there's no projective version ++ * since the last slot is taken up by lod info. Do the ++ * projective divide now. ++ */ ++ coord_dst.writemask = WRITEMASK_W; ++ ir_to_mesa_emit_op1(ir, OPCODE_RCP, coord_dst, projector); ++ ++ coord_dst.writemask = WRITEMASK_XYZ; ++ ir_to_mesa_emit_op2(ir, OPCODE_MUL, coord_dst, coord, coord_w); ++ ++ coord_dst.writemask = WRITEMASK_XYZW; ++ coord.swizzle = SWIZZLE_XYZW; ++ } ++ } ++ ++ if (ir->shadow_comparitor) { ++ /* Slot the shadow value in as the second to last component of the ++ * coord. ++ */ ++ ir->shadow_comparitor->accept(this); ++ coord_dst.writemask = WRITEMASK_Z; ++ ir_to_mesa_emit_op1(ir, OPCODE_MOV, coord_dst, this->result); ++ coord_dst.writemask = WRITEMASK_XYZW; ++ } ++ ++ if (opcode == OPCODE_TXL || opcode == OPCODE_TXB) { ++ /* Mesa IR stores lod or lod bias in the last channel of the coords. */ ++ coord_dst.writemask = WRITEMASK_W; ++ ir_to_mesa_emit_op1(ir, OPCODE_MOV, coord_dst, lod_info); ++ coord_dst.writemask = WRITEMASK_XYZW; ++ } ++ ++ inst = ir_to_mesa_emit_op1(ir, opcode, result_dst, coord); ++ ++ if (ir->shadow_comparitor) ++ inst->tex_shadow = GL_TRUE; ++ ++ ir_dereference_variable *sampler = ir->sampler->as_dereference_variable(); ++ assert(sampler); /* FINISHME: sampler arrays */ ++ /* generate the mapping, remove when we generate storage at ++ * declaration time ++ */ ++ sampler->accept(this); ++ ++ inst->sampler = get_sampler_number(sampler->var->location); ++ ++ switch (sampler->type->sampler_dimensionality) { ++ case GLSL_SAMPLER_DIM_1D: ++ inst->tex_target = TEXTURE_1D_INDEX; ++ break; ++ case GLSL_SAMPLER_DIM_2D: ++ inst->tex_target = TEXTURE_2D_INDEX; ++ break; ++ case GLSL_SAMPLER_DIM_3D: ++ inst->tex_target = TEXTURE_3D_INDEX; ++ break; ++ case GLSL_SAMPLER_DIM_CUBE: ++ inst->tex_target = TEXTURE_CUBE_INDEX; ++ break; ++ default: ++ assert(!"FINISHME: other texture targets"); ++ } ++ ++ this->result = result_src; ++} ++ ++void ++ir_to_mesa_visitor::visit(ir_return *ir) ++{ ++ assert(current_function); ++ ++ if (ir->get_value()) { ++ ir_to_mesa_dst_reg l; ++ int i; ++ ++ ir->get_value()->accept(this); ++ ir_to_mesa_src_reg r = this->result; ++ ++ l = ir_to_mesa_dst_reg_from_src(current_function->return_reg); ++ ++ for (i = 0; i < type_size(current_function->sig->return_type); i++) { ++ ir_to_mesa_emit_op1(ir, OPCODE_MOV, l, r); ++ l.index++; ++ r.index++; ++ } ++ } ++ ++ ir_to_mesa_emit_op0(ir, OPCODE_RET); ++} ++ ++void ++ir_to_mesa_visitor::visit(ir_discard *ir) ++{ ++ assert(ir->condition == NULL); /* FINISHME */ ++ ++ ir_to_mesa_emit_op0(ir, OPCODE_KIL_NV); ++} ++ ++void ++ir_to_mesa_visitor::visit(ir_if *ir) ++{ ++ ir_to_mesa_instruction *cond_inst, *if_inst, *else_inst = NULL; ++ ir_to_mesa_instruction *prev_inst; ++ ++ prev_inst = (ir_to_mesa_instruction *)this->instructions.get_tail(); ++ ++ ir->condition->accept(this); ++ assert(this->result.file != PROGRAM_UNDEFINED); ++ ++ if (ctx->Shader.EmitCondCodes) { ++ cond_inst = (ir_to_mesa_instruction *)this->instructions.get_tail(); ++ ++ /* See if we actually generated any instruction for generating ++ * the condition. If not, then cook up a move to a temp so we ++ * have something to set cond_update on. ++ */ ++ if (cond_inst == prev_inst) { ++ ir_to_mesa_src_reg temp = get_temp(glsl_type::bool_type); ++ cond_inst = ir_to_mesa_emit_op1(ir->condition, OPCODE_MOV, ++ ir_to_mesa_dst_reg_from_src(temp), ++ result); ++ } ++ cond_inst->cond_update = GL_TRUE; ++ ++ if_inst = ir_to_mesa_emit_op0(ir->condition, OPCODE_IF); ++ if_inst->dst_reg.cond_mask = COND_NE; ++ } else { ++ if_inst = ir_to_mesa_emit_op1(ir->condition, ++ OPCODE_IF, ir_to_mesa_undef_dst, ++ this->result); ++ } ++ ++ this->instructions.push_tail(if_inst); ++ ++ visit_exec_list(&ir->then_instructions, this); ++ ++ if (!ir->else_instructions.is_empty()) { ++ else_inst = ir_to_mesa_emit_op0(ir->condition, OPCODE_ELSE); ++ visit_exec_list(&ir->else_instructions, this); ++ } ++ ++ if_inst = ir_to_mesa_emit_op1(ir->condition, OPCODE_ENDIF, ++ ir_to_mesa_undef_dst, ir_to_mesa_undef); ++} ++ ++ir_to_mesa_visitor::ir_to_mesa_visitor() ++{ ++ result.file = PROGRAM_UNDEFINED; ++ next_temp = 1; ++ next_signature_id = 1; ++ sampler_map = NULL; ++ sampler_map_size = 0; ++ current_function = NULL; ++} ++ ++static struct prog_src_register ++mesa_src_reg_from_ir_src_reg(ir_to_mesa_src_reg reg) ++{ ++ struct prog_src_register mesa_reg; ++ ++ mesa_reg.File = reg.file; ++ assert(reg.index < (1 << INST_INDEX_BITS) - 1); ++ mesa_reg.Index = reg.index; ++ mesa_reg.Swizzle = reg.swizzle; ++ mesa_reg.RelAddr = reg.reladdr != NULL; ++ mesa_reg.Negate = reg.negate; ++ mesa_reg.Abs = 0; ++ ++ return mesa_reg; ++} ++ ++static void ++set_branchtargets(ir_to_mesa_visitor *v, ++ struct prog_instruction *mesa_instructions, ++ int num_instructions) ++{ ++ int if_count = 0, loop_count = 0; ++ int *if_stack, *loop_stack; ++ int if_stack_pos = 0, loop_stack_pos = 0; ++ int i, j; ++ ++ for (i = 0; i < num_instructions; i++) { ++ switch (mesa_instructions[i].Opcode) { ++ case OPCODE_IF: ++ if_count++; ++ break; ++ case OPCODE_BGNLOOP: ++ loop_count++; ++ break; ++ case OPCODE_BRK: ++ case OPCODE_CONT: ++ mesa_instructions[i].BranchTarget = -1; ++ break; ++ default: ++ break; ++ } ++ } ++ ++ if_stack = (int *)calloc(if_count, sizeof(*if_stack)); ++ loop_stack = (int *)calloc(loop_count, sizeof(*loop_stack)); ++ ++ for (i = 0; i < num_instructions; i++) { ++ switch (mesa_instructions[i].Opcode) { ++ case OPCODE_IF: ++ if_stack[if_stack_pos] = i; ++ if_stack_pos++; ++ break; ++ case OPCODE_ELSE: ++ mesa_instructions[if_stack[if_stack_pos - 1]].BranchTarget = i; ++ if_stack[if_stack_pos - 1] = i; ++ break; ++ case OPCODE_ENDIF: ++ mesa_instructions[if_stack[if_stack_pos - 1]].BranchTarget = i; ++ if_stack_pos--; ++ break; ++ case OPCODE_BGNLOOP: ++ loop_stack[loop_stack_pos] = i; ++ loop_stack_pos++; ++ break; ++ case OPCODE_ENDLOOP: ++ loop_stack_pos--; ++ /* Rewrite any breaks/conts at this nesting level (haven't ++ * already had a BranchTarget assigned) to point to the end ++ * of the loop. ++ */ ++ for (j = loop_stack[loop_stack_pos]; j < i; j++) { ++ if (mesa_instructions[j].Opcode == OPCODE_BRK || ++ mesa_instructions[j].Opcode == OPCODE_CONT) { ++ if (mesa_instructions[j].BranchTarget == -1) { ++ mesa_instructions[j].BranchTarget = i; ++ } ++ } ++ } ++ /* The loop ends point at each other. */ ++ mesa_instructions[i].BranchTarget = loop_stack[loop_stack_pos]; ++ mesa_instructions[loop_stack[loop_stack_pos]].BranchTarget = i; ++ break; ++ case OPCODE_CAL: ++ foreach_iter(exec_list_iterator, iter, v->function_signatures) { ++ function_entry *entry = (function_entry *)iter.get(); ++ ++ if (entry->sig_id == mesa_instructions[i].BranchTarget) { ++ mesa_instructions[i].BranchTarget = entry->inst; ++ break; ++ } ++ } ++ break; ++ default: ++ break; ++ } ++ } ++ ++ free(if_stack); ++} ++ ++static void ++print_program(struct prog_instruction *mesa_instructions, ++ ir_instruction **mesa_instruction_annotation, ++ int num_instructions) ++{ ++ ir_instruction *last_ir = NULL; ++ int i; ++ int indent = 0; ++ ++ for (i = 0; i < num_instructions; i++) { ++ struct prog_instruction *mesa_inst = mesa_instructions + i; ++ ir_instruction *ir = mesa_instruction_annotation[i]; ++ ++ fprintf(stdout, "%3d: ", i); ++ ++ if (last_ir != ir && ir) { ++ int j; ++ ++ for (j = 0; j < indent; j++) { ++ fprintf(stdout, " "); ++ } ++ ir->print(); ++ printf("\n"); ++ last_ir = ir; ++ ++ fprintf(stdout, " "); /* line number spacing. */ ++ } ++ ++ indent = _mesa_fprint_instruction_opt(stdout, mesa_inst, indent, ++ PROG_PRINT_DEBUG, NULL); ++ } ++} ++ ++static void ++mark_input(struct gl_program *prog, ++ int index, ++ GLboolean reladdr) ++{ ++ prog->InputsRead |= BITFIELD64_BIT(index); ++ int i; ++ ++ if (reladdr) { ++ if (index >= FRAG_ATTRIB_TEX0 && index <= FRAG_ATTRIB_TEX7) { ++ for (i = 0; i < 8; i++) { ++ prog->InputsRead |= BITFIELD64_BIT(FRAG_ATTRIB_TEX0 + i); ++ } ++ } else { ++ assert(!"FINISHME: Mark InputsRead for varying arrays"); ++ } ++ } ++} ++ ++static void ++mark_output(struct gl_program *prog, ++ int index, ++ GLboolean reladdr) ++{ ++ prog->OutputsWritten |= BITFIELD64_BIT(index); ++ int i; ++ ++ if (reladdr) { ++ if (index >= VERT_RESULT_TEX0 && index <= VERT_RESULT_TEX7) { ++ for (i = 0; i < 8; i++) { ++ prog->OutputsWritten |= BITFIELD64_BIT(FRAG_ATTRIB_TEX0 + i); ++ } ++ } else { ++ assert(!"FINISHME: Mark OutputsWritten for varying arrays"); ++ } ++ } ++} ++ ++static void ++count_resources(struct gl_program *prog) ++{ ++ unsigned int i; ++ ++ prog->InputsRead = 0; ++ prog->OutputsWritten = 0; ++ prog->SamplersUsed = 0; ++ ++ for (i = 0; i < prog->NumInstructions; i++) { ++ struct prog_instruction *inst = &prog->Instructions[i]; ++ unsigned int reg; ++ ++ switch (inst->DstReg.File) { ++ case PROGRAM_OUTPUT: ++ mark_output(prog, inst->DstReg.Index, inst->DstReg.RelAddr); ++ break; ++ case PROGRAM_INPUT: ++ mark_input(prog, inst->DstReg.Index, inst->DstReg.RelAddr); ++ break; ++ default: ++ break; ++ } ++ ++ for (reg = 0; reg < _mesa_num_inst_src_regs(inst->Opcode); reg++) { ++ switch (inst->SrcReg[reg].File) { ++ case PROGRAM_OUTPUT: ++ mark_output(prog, inst->SrcReg[reg].Index, ++ inst->SrcReg[reg].RelAddr); ++ break; ++ case PROGRAM_INPUT: ++ mark_input(prog, inst->SrcReg[reg].Index, inst->SrcReg[reg].RelAddr); ++ break; ++ default: ++ break; ++ } ++ } ++ ++ /* Instead of just using the uniform's value to map to a ++ * sampler, Mesa first allocates a separate number for the ++ * sampler (_mesa_add_sampler), then we reindex it down to a ++ * small integer (sampler_map[], SamplersUsed), then that gets ++ * mapped to the uniform's value, and we get an actual sampler. ++ */ ++ if (_mesa_is_tex_instruction(inst->Opcode)) { ++ prog->SamplerTargets[inst->TexSrcUnit] = ++ (gl_texture_index)inst->TexSrcTarget; ++ prog->SamplersUsed |= 1 << inst->TexSrcUnit; ++ if (inst->TexShadow) { ++ prog->ShadowSamplers |= 1 << inst->TexSrcUnit; ++ } ++ } ++ } ++ ++ _mesa_update_shader_textures_used(prog); ++} ++ ++/* Each stage has some uniforms in its Parameters list. The Uniforms ++ * list for the linked shader program has a pointer to these uniforms ++ * in each of the stage's Parameters list, so that their values can be ++ * updated when a uniform is set. ++ */ ++static void ++link_uniforms_to_shared_uniform_list(struct gl_uniform_list *uniforms, ++ struct gl_program *prog) ++{ ++ unsigned int i; ++ ++ for (i = 0; i < prog->Parameters->NumParameters; i++) { ++ const struct gl_program_parameter *p = prog->Parameters->Parameters + i; ++ ++ if (p->Type == PROGRAM_UNIFORM || p->Type == PROGRAM_SAMPLER) { ++ struct gl_uniform *uniform = ++ _mesa_append_uniform(uniforms, p->Name, prog->Target, i); ++ if (uniform) ++ uniform->Initialized = p->Initialized; ++ } ++ } ++} ++ ++struct gl_program * ++get_mesa_program(GLcontext *ctx, struct gl_shader_program *shader_program, ++ struct gl_shader *shader) ++{ ++ void *mem_ctx = shader_program; ++ ir_to_mesa_visitor v; ++ struct prog_instruction *mesa_instructions, *mesa_inst; ++ ir_instruction **mesa_instruction_annotation; ++ int i; ++ struct gl_program *prog; ++ GLenum target; ++ const char *target_string; ++ GLboolean progress; ++ ++ switch (shader->Type) { ++ case GL_VERTEX_SHADER: ++ target = GL_VERTEX_PROGRAM_ARB; ++ target_string = "vertex"; ++ break; ++ case GL_FRAGMENT_SHADER: ++ target = GL_FRAGMENT_PROGRAM_ARB; ++ target_string = "fragment"; ++ break; ++ default: ++ assert(!"should not be reached"); ++ break; ++ } ++ ++ validate_ir_tree(shader->ir); ++ ++ prog = ctx->Driver.NewProgram(ctx, target, 1); ++ if (!prog) ++ return NULL; ++ prog->Parameters = _mesa_new_parameter_list(); ++ prog->Varying = _mesa_new_parameter_list(); ++ prog->Attributes = _mesa_new_parameter_list(); ++ v.ctx = ctx; ++ v.prog = prog; ++ ++ v.mem_ctx = talloc_new(NULL); ++ ++ /* Emit Mesa IR for main(). */ ++ visit_exec_list(shader->ir, &v); ++ v.ir_to_mesa_emit_op0(NULL, OPCODE_END); ++ ++ /* Now emit bodies for any functions that were used. */ ++ do { ++ progress = GL_FALSE; ++ ++ foreach_iter(exec_list_iterator, iter, v.function_signatures) { ++ function_entry *entry = (function_entry *)iter.get(); ++ ++ if (!entry->bgn_inst) { ++ v.current_function = entry; ++ ++ entry->bgn_inst = v.ir_to_mesa_emit_op0(NULL, OPCODE_BGNSUB); ++ entry->bgn_inst->function = entry; ++ ++ visit_exec_list(&entry->sig->body, &v); ++ ++ entry->bgn_inst = v.ir_to_mesa_emit_op0(NULL, OPCODE_RET); ++ entry->bgn_inst = v.ir_to_mesa_emit_op0(NULL, OPCODE_ENDSUB); ++ progress = GL_TRUE; ++ } ++ } ++ } while (progress); ++ ++ prog->NumTemporaries = v.next_temp; ++ ++ int num_instructions = 0; ++ foreach_iter(exec_list_iterator, iter, v.instructions) { ++ num_instructions++; ++ } ++ ++ mesa_instructions = ++ (struct prog_instruction *)calloc(num_instructions, ++ sizeof(*mesa_instructions)); ++ mesa_instruction_annotation = talloc_array(mem_ctx, ir_instruction *, ++ num_instructions); ++ ++ mesa_inst = mesa_instructions; ++ i = 0; ++ foreach_iter(exec_list_iterator, iter, v.instructions) { ++ ir_to_mesa_instruction *inst = (ir_to_mesa_instruction *)iter.get(); ++ ++ mesa_inst->Opcode = inst->op; ++ mesa_inst->CondUpdate = inst->cond_update; ++ mesa_inst->DstReg.File = inst->dst_reg.file; ++ mesa_inst->DstReg.Index = inst->dst_reg.index; ++ mesa_inst->DstReg.CondMask = inst->dst_reg.cond_mask; ++ mesa_inst->DstReg.WriteMask = inst->dst_reg.writemask; ++ mesa_inst->DstReg.RelAddr = inst->dst_reg.reladdr != NULL; ++ mesa_inst->SrcReg[0] = mesa_src_reg_from_ir_src_reg(inst->src_reg[0]); ++ mesa_inst->SrcReg[1] = mesa_src_reg_from_ir_src_reg(inst->src_reg[1]); ++ mesa_inst->SrcReg[2] = mesa_src_reg_from_ir_src_reg(inst->src_reg[2]); ++ mesa_inst->TexSrcUnit = inst->sampler; ++ mesa_inst->TexSrcTarget = inst->tex_target; ++ mesa_inst->TexShadow = inst->tex_shadow; ++ mesa_instruction_annotation[i] = inst->ir; ++ ++ if (ctx->Shader.EmitNoIfs && mesa_inst->Opcode == OPCODE_IF) { ++ shader_program->InfoLog = ++ talloc_asprintf_append(shader_program->InfoLog, ++ "Couldn't flatten if statement\n"); ++ shader_program->LinkStatus = false; ++ } ++ ++ if (mesa_inst->Opcode == OPCODE_BGNSUB) ++ inst->function->inst = i; ++ else if (mesa_inst->Opcode == OPCODE_CAL) ++ mesa_inst->BranchTarget = inst->function->sig_id; /* rewritten later */ ++ else if (mesa_inst->Opcode == OPCODE_ARL) ++ prog->NumAddressRegs = 1; ++ ++ mesa_inst++; ++ i++; ++ } ++ ++ set_branchtargets(&v, mesa_instructions, num_instructions); ++ if (ctx->Shader.Flags & GLSL_DUMP) { ++ printf("Mesa %s program:\n", target_string); ++ print_program(mesa_instructions, mesa_instruction_annotation, ++ num_instructions); ++ } ++ ++ prog->Instructions = mesa_instructions; ++ prog->NumInstructions = num_instructions; ++ ++ _mesa_reference_program(ctx, &shader->Program, prog); ++ ++ if ((ctx->Shader.Flags & GLSL_NO_OPT) == 0) { ++ _mesa_optimize_program(ctx, prog); ++ } ++ ++ return prog; ++} ++ ++extern "C" { ++ ++void ++_mesa_glsl_compile_shader(GLcontext *ctx, struct gl_shader *shader) ++{ ++ struct _mesa_glsl_parse_state *state = ++ new(shader) _mesa_glsl_parse_state(ctx, shader->Type, shader); ++ ++ const char *source = shader->Source; ++ state->error = preprocess(state, &source, &state->info_log, ++ &ctx->Extensions); ++ ++ if (!state->error) { ++ _mesa_glsl_lexer_ctor(state, source); ++ _mesa_glsl_parse(state); ++ _mesa_glsl_lexer_dtor(state); ++ } ++ ++ shader->ir = new(shader) exec_list; ++ if (!state->error && !state->translation_unit.is_empty()) ++ _mesa_ast_to_hir(shader->ir, state); ++ ++ if (!state->error && !shader->ir->is_empty()) { ++ validate_ir_tree(shader->ir); ++ ++ /* Lowering */ ++ do_mat_op_to_vec(shader->ir); ++ do_mod_to_fract(shader->ir); ++ do_div_to_mul_rcp(shader->ir); ++ ++ /* Optimization passes */ ++ bool progress; ++ do { ++ progress = false; ++ ++ progress = do_function_inlining(shader->ir) || progress; ++ progress = do_if_simplification(shader->ir) || progress; ++ progress = do_copy_propagation(shader->ir) || progress; ++ progress = do_dead_code_local(shader->ir) || progress; ++ progress = do_dead_code_unlinked(state, shader->ir) || progress; ++ progress = do_constant_variable_unlinked(shader->ir) || progress; ++ progress = do_constant_folding(shader->ir) || progress; ++ progress = do_if_return(shader->ir) || progress; ++ if (ctx->Shader.EmitNoIfs) ++ progress = do_if_to_cond_assign(shader->ir) || progress; ++ ++ progress = do_vec_index_to_swizzle(shader->ir) || progress; ++ /* Do this one after the previous to let the easier pass handle ++ * constant vector indexing. ++ */ ++ progress = do_vec_index_to_cond_assign(shader->ir) || progress; ++ ++ progress = do_swizzle_swizzle(shader->ir) || progress; ++ } while (progress); ++ ++ validate_ir_tree(shader->ir); ++ } ++ ++ shader->symbols = state->symbols; ++ ++ shader->CompileStatus = !state->error; ++ shader->InfoLog = state->info_log; ++ shader->Version = state->language_version; ++ memcpy(shader->builtins_to_link, state->builtins_to_link, ++ sizeof(shader->builtins_to_link[0]) * state->num_builtins_to_link); ++ shader->num_builtins_to_link = state->num_builtins_to_link; ++ ++ /* Retain any live IR, but trash the rest. */ ++ reparent_ir(shader->ir, shader); ++ ++ talloc_free(state); ++ } ++ ++void ++_mesa_glsl_link_shader(GLcontext *ctx, struct gl_shader_program *prog) ++{ ++ unsigned int i; ++ ++ _mesa_clear_shader_program_data(ctx, prog); ++ ++ prog->LinkStatus = GL_TRUE; ++ ++ for (i = 0; i < prog->NumShaders; i++) { ++ if (!prog->Shaders[i]->CompileStatus) { ++ prog->InfoLog = ++ talloc_asprintf_append(prog->InfoLog, ++ "linking with uncompiled shader"); ++ prog->LinkStatus = GL_FALSE; ++ } ++ } ++ ++ prog->Varying = _mesa_new_parameter_list(); ++ _mesa_reference_vertprog(ctx, &prog->VertexProgram, NULL); ++ _mesa_reference_fragprog(ctx, &prog->FragmentProgram, NULL); ++ ++ if (prog->LinkStatus) { ++ link_shaders(prog); ++ ++ /* We don't use the linker's uniforms list, and cook up our own at ++ * generate time. ++ */ ++ free(prog->Uniforms); ++ prog->Uniforms = _mesa_new_uniform_list(); ++ } ++ ++ if (prog->LinkStatus) { ++ for (i = 0; i < prog->_NumLinkedShaders; i++) { ++ struct gl_program *linked_prog; ++ ++ linked_prog = get_mesa_program(ctx, prog, ++ prog->_LinkedShaders[i]); ++ count_resources(linked_prog); ++ ++ link_uniforms_to_shared_uniform_list(prog->Uniforms, linked_prog); ++ ++ switch (prog->_LinkedShaders[i]->Type) { ++ case GL_VERTEX_SHADER: ++ _mesa_reference_vertprog(ctx, &prog->VertexProgram, ++ (struct gl_vertex_program *)linked_prog); ++ ctx->Driver.ProgramStringNotify(ctx, GL_VERTEX_PROGRAM_ARB, ++ linked_prog); ++ break; ++ case GL_FRAGMENT_SHADER: ++ _mesa_reference_fragprog(ctx, &prog->FragmentProgram, ++ (struct gl_fragment_program *)linked_prog); ++ ctx->Driver.ProgramStringNotify(ctx, GL_FRAGMENT_PROGRAM_ARB, ++ linked_prog); ++ break; ++ } ++ } ++ } ++} ++ ++} /* extern "C" */ diff --cc src/mesa/program/ir_to_mesa.h index 00000000000,00000000000..e832f84e754 new file mode 100644 --- /dev/null +++ b/src/mesa/program/ir_to_mesa.h @@@ -1,0 -1,0 +1,36 @@@ ++/* ++ * Copyright © 2010 Intel Corporation ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the "Software"), ++ * to deal in the Software without restriction, including without limitation ++ * the rights to use, copy, modify, merge, publish, distribute, sublicense, ++ * and/or sell copies of the Software, and to permit persons to whom the ++ * Software is furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice (including the next ++ * paragraph) shall be included in all copies or substantial portions of the ++ * Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ++ * DEALINGS IN THE SOFTWARE. ++ */ ++ ++#ifdef __cplusplus ++extern "C" { ++#endif ++ ++#include "main/config.h" ++#include "main/mtypes.h" ++ ++void _mesa_glsl_compile_shader(GLcontext *ctx, struct gl_shader *sh); ++void _mesa_glsl_link_shader(GLcontext *ctx, struct gl_shader_program *prog); ++ ++#ifdef __cplusplus ++} ++#endif diff --cc src/mesa/program/prog_execute.c index 00000000000,f85c6513f31..b6da3449b26 mode 000000,100644..100644 --- a/src/mesa/program/prog_execute.c +++ b/src/mesa/program/prog_execute.c @@@ -1,0 -1,1798 +1,1802 @@@ + /* + * Mesa 3-D graphics library + * Version: 7.3 + * + * Copyright (C) 1999-2008 Brian Paul All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + + /** + * \file prog_execute.c + * Software interpreter for vertex/fragment programs. + * \author Brian Paul + */ + + /* + * NOTE: we do everything in single-precision floating point; we don't + * currently observe the single/half/fixed-precision qualifiers. + * + */ + + + #include "main/glheader.h" + #include "main/colormac.h" + #include "main/context.h" + #include "prog_execute.h" + #include "prog_instruction.h" + #include "prog_parameter.h" + #include "prog_print.h" + #include "prog_noise.h" + + + /* debug predicate */ + #define DEBUG_PROG 0 + + + /** + * Set x to positive or negative infinity. + */ + #if defined(USE_IEEE) || defined(_WIN32) + #define SET_POS_INFINITY(x) \ + do { \ + fi_type fi; \ + fi.i = 0x7F800000; \ + x = fi.f; \ + } while (0) + #define SET_NEG_INFINITY(x) \ + do { \ + fi_type fi; \ + fi.i = 0xFF800000; \ + x = fi.f; \ + } while (0) + #elif defined(VMS) + #define SET_POS_INFINITY(x) x = __MAXFLOAT + #define SET_NEG_INFINITY(x) x = -__MAXFLOAT + #else + #define SET_POS_INFINITY(x) x = (GLfloat) HUGE_VAL + #define SET_NEG_INFINITY(x) x = (GLfloat) -HUGE_VAL + #endif + + #define SET_FLOAT_BITS(x, bits) ((fi_type *) (void *) &(x))->i = bits + + + static const GLfloat ZeroVec[4] = { 0.0F, 0.0F, 0.0F, 0.0F }; + + + + /** + * Return a pointer to the 4-element float vector specified by the given + * source register. + */ + static INLINE const GLfloat * + get_src_register_pointer(const struct prog_src_register *source, + const struct gl_program_machine *machine) + { + const struct gl_program *prog = machine->CurProgram; + GLint reg = source->Index; + + if (source->RelAddr) { + /* add address register value to src index/offset */ + reg += machine->AddressReg[0][0]; + if (reg < 0) { + return ZeroVec; + } + } + + switch (source->File) { + case PROGRAM_TEMPORARY: + if (reg >= MAX_PROGRAM_TEMPS) + return ZeroVec; + return machine->Temporaries[reg]; + + case PROGRAM_INPUT: + if (prog->Target == GL_VERTEX_PROGRAM_ARB) { + if (reg >= VERT_ATTRIB_MAX) + return ZeroVec; + return machine->VertAttribs[reg]; + } + else { + if (reg >= FRAG_ATTRIB_MAX) + return ZeroVec; + return machine->Attribs[reg][machine->CurElement]; + } + + case PROGRAM_OUTPUT: + if (reg >= MAX_PROGRAM_OUTPUTS) + return ZeroVec; + return machine->Outputs[reg]; + + case PROGRAM_LOCAL_PARAM: + if (reg >= MAX_PROGRAM_LOCAL_PARAMS) + return ZeroVec; + return machine->CurProgram->LocalParams[reg]; + + case PROGRAM_ENV_PARAM: + if (reg >= MAX_PROGRAM_ENV_PARAMS) + return ZeroVec; + return machine->EnvParams[reg]; + + case PROGRAM_STATE_VAR: + /* Fallthrough */ + case PROGRAM_CONSTANT: + /* Fallthrough */ + case PROGRAM_UNIFORM: + /* Fallthrough */ + case PROGRAM_NAMED_PARAM: + if (reg >= (GLint) prog->Parameters->NumParameters) + return ZeroVec; + return prog->Parameters->ParameterValues[reg]; + + default: + _mesa_problem(NULL, + "Invalid src register file %d in get_src_register_pointer()", + source->File); + return NULL; + } + } + + + /** + * Return a pointer to the 4-element float vector specified by the given + * destination register. + */ + static INLINE GLfloat * + get_dst_register_pointer(const struct prog_dst_register *dest, + struct gl_program_machine *machine) + { + static GLfloat dummyReg[4]; + GLint reg = dest->Index; + + if (dest->RelAddr) { + /* add address register value to src index/offset */ + reg += machine->AddressReg[0][0]; + if (reg < 0) { + return dummyReg; + } + } + + switch (dest->File) { + case PROGRAM_TEMPORARY: + if (reg >= MAX_PROGRAM_TEMPS) + return dummyReg; + return machine->Temporaries[reg]; + + case PROGRAM_OUTPUT: + if (reg >= MAX_PROGRAM_OUTPUTS) + return dummyReg; + return machine->Outputs[reg]; + + case PROGRAM_WRITE_ONLY: + return dummyReg; + + default: + _mesa_problem(NULL, + "Invalid dest register file %d in get_dst_register_pointer()", + dest->File); + return NULL; + } + } + + + + /** + * Fetch a 4-element float vector from the given source register. + * Apply swizzling and negating as needed. + */ + static void + fetch_vector4(const struct prog_src_register *source, + const struct gl_program_machine *machine, GLfloat result[4]) + { + const GLfloat *src = get_src_register_pointer(source, machine); + ASSERT(src); + + if (source->Swizzle == SWIZZLE_NOOP) { + /* no swizzling */ + COPY_4V(result, src); + } + else { + ASSERT(GET_SWZ(source->Swizzle, 0) <= 3); + ASSERT(GET_SWZ(source->Swizzle, 1) <= 3); + ASSERT(GET_SWZ(source->Swizzle, 2) <= 3); + ASSERT(GET_SWZ(source->Swizzle, 3) <= 3); + result[0] = src[GET_SWZ(source->Swizzle, 0)]; + result[1] = src[GET_SWZ(source->Swizzle, 1)]; + result[2] = src[GET_SWZ(source->Swizzle, 2)]; + result[3] = src[GET_SWZ(source->Swizzle, 3)]; + } + + if (source->Abs) { + result[0] = FABSF(result[0]); + result[1] = FABSF(result[1]); + result[2] = FABSF(result[2]); + result[3] = FABSF(result[3]); + } + if (source->Negate) { + ASSERT(source->Negate == NEGATE_XYZW); + result[0] = -result[0]; + result[1] = -result[1]; + result[2] = -result[2]; + result[3] = -result[3]; + } + + #ifdef NAN_CHECK + assert(!IS_INF_OR_NAN(result[0])); + assert(!IS_INF_OR_NAN(result[0])); + assert(!IS_INF_OR_NAN(result[0])); + assert(!IS_INF_OR_NAN(result[0])); + #endif + } + + + /** + * Fetch a 4-element uint vector from the given source register. + * Apply swizzling but not negation/abs. + */ + static void + fetch_vector4ui(const struct prog_src_register *source, + const struct gl_program_machine *machine, GLuint result[4]) + { + const GLuint *src = (GLuint *) get_src_register_pointer(source, machine); + ASSERT(src); + + if (source->Swizzle == SWIZZLE_NOOP) { + /* no swizzling */ + COPY_4V(result, src); + } + else { + ASSERT(GET_SWZ(source->Swizzle, 0) <= 3); + ASSERT(GET_SWZ(source->Swizzle, 1) <= 3); + ASSERT(GET_SWZ(source->Swizzle, 2) <= 3); + ASSERT(GET_SWZ(source->Swizzle, 3) <= 3); + result[0] = src[GET_SWZ(source->Swizzle, 0)]; + result[1] = src[GET_SWZ(source->Swizzle, 1)]; + result[2] = src[GET_SWZ(source->Swizzle, 2)]; + result[3] = src[GET_SWZ(source->Swizzle, 3)]; + } + + /* Note: no Negate or Abs here */ + } + + + + /** + * Fetch the derivative with respect to X or Y for the given register. + * XXX this currently only works for fragment program input attribs. + */ + static void + fetch_vector4_deriv(GLcontext * ctx, + const struct prog_src_register *source, + const struct gl_program_machine *machine, + char xOrY, GLfloat result[4]) + { + if (source->File == PROGRAM_INPUT && + source->Index < (GLint) machine->NumDeriv) { + const GLint col = machine->CurElement; + const GLfloat w = machine->Attribs[FRAG_ATTRIB_WPOS][col][3]; + const GLfloat invQ = 1.0f / w; + GLfloat deriv[4]; + + if (xOrY == 'X') { + deriv[0] = machine->DerivX[source->Index][0] * invQ; + deriv[1] = machine->DerivX[source->Index][1] * invQ; + deriv[2] = machine->DerivX[source->Index][2] * invQ; + deriv[3] = machine->DerivX[source->Index][3] * invQ; + } + else { + deriv[0] = machine->DerivY[source->Index][0] * invQ; + deriv[1] = machine->DerivY[source->Index][1] * invQ; + deriv[2] = machine->DerivY[source->Index][2] * invQ; + deriv[3] = machine->DerivY[source->Index][3] * invQ; + } + + result[0] = deriv[GET_SWZ(source->Swizzle, 0)]; + result[1] = deriv[GET_SWZ(source->Swizzle, 1)]; + result[2] = deriv[GET_SWZ(source->Swizzle, 2)]; + result[3] = deriv[GET_SWZ(source->Swizzle, 3)]; + + if (source->Abs) { + result[0] = FABSF(result[0]); + result[1] = FABSF(result[1]); + result[2] = FABSF(result[2]); + result[3] = FABSF(result[3]); + } + if (source->Negate) { + ASSERT(source->Negate == NEGATE_XYZW); + result[0] = -result[0]; + result[1] = -result[1]; + result[2] = -result[2]; + result[3] = -result[3]; + } + } + else { + ASSIGN_4V(result, 0.0, 0.0, 0.0, 0.0); + } + } + + + /** + * As above, but only return result[0] element. + */ + static void + fetch_vector1(const struct prog_src_register *source, + const struct gl_program_machine *machine, GLfloat result[4]) + { + const GLfloat *src = get_src_register_pointer(source, machine); + ASSERT(src); + + result[0] = src[GET_SWZ(source->Swizzle, 0)]; + + if (source->Abs) { + result[0] = FABSF(result[0]); + } + if (source->Negate) { + result[0] = -result[0]; + } + } + + + static GLuint + fetch_vector1ui(const struct prog_src_register *source, + const struct gl_program_machine *machine) + { + const GLuint *src = (GLuint *) get_src_register_pointer(source, machine); + return src[GET_SWZ(source->Swizzle, 0)]; + } + + + /** + * Fetch texel from texture. Use partial derivatives when possible. + */ + static INLINE void + fetch_texel(GLcontext *ctx, + const struct gl_program_machine *machine, + const struct prog_instruction *inst, + const GLfloat texcoord[4], GLfloat lodBias, + GLfloat color[4]) + { + const GLuint unit = machine->Samplers[inst->TexSrcUnit]; + + /* Note: we only have the right derivatives for fragment input attribs. + */ + if (machine->NumDeriv > 0 && + inst->SrcReg[0].File == PROGRAM_INPUT && + inst->SrcReg[0].Index == FRAG_ATTRIB_TEX0 + inst->TexSrcUnit) { + /* simple texture fetch for which we should have derivatives */ + GLuint attr = inst->SrcReg[0].Index; + machine->FetchTexelDeriv(ctx, texcoord, + machine->DerivX[attr], + machine->DerivY[attr], + lodBias, unit, color); + } + else { + machine->FetchTexelLod(ctx, texcoord, lodBias, unit, color); + } + } + + + /** + * Test value against zero and return GT, LT, EQ or UN if NaN. + */ + static INLINE GLuint + generate_cc(float value) + { + if (value != value) + return COND_UN; /* NaN */ + if (value > 0.0F) + return COND_GT; + if (value < 0.0F) + return COND_LT; + return COND_EQ; + } + + + /** + * Test if the ccMaskRule is satisfied by the given condition code. + * Used to mask destination writes according to the current condition code. + */ + static INLINE GLboolean + test_cc(GLuint condCode, GLuint ccMaskRule) + { + switch (ccMaskRule) { + case COND_EQ: return (condCode == COND_EQ); + case COND_NE: return (condCode != COND_EQ); + case COND_LT: return (condCode == COND_LT); + case COND_GE: return (condCode == COND_GT || condCode == COND_EQ); + case COND_LE: return (condCode == COND_LT || condCode == COND_EQ); + case COND_GT: return (condCode == COND_GT); + case COND_TR: return GL_TRUE; + case COND_FL: return GL_FALSE; + default: return GL_TRUE; + } + } + + + /** + * Evaluate the 4 condition codes against a predicate and return GL_TRUE + * or GL_FALSE to indicate result. + */ + static INLINE GLboolean + eval_condition(const struct gl_program_machine *machine, + const struct prog_instruction *inst) + { + const GLuint swizzle = inst->DstReg.CondSwizzle; + const GLuint condMask = inst->DstReg.CondMask; + if (test_cc(machine->CondCodes[GET_SWZ(swizzle, 0)], condMask) || + test_cc(machine->CondCodes[GET_SWZ(swizzle, 1)], condMask) || + test_cc(machine->CondCodes[GET_SWZ(swizzle, 2)], condMask) || + test_cc(machine->CondCodes[GET_SWZ(swizzle, 3)], condMask)) { + return GL_TRUE; + } + else { + return GL_FALSE; + } + } + + + + /** + * Store 4 floats into a register. Observe the instructions saturate and + * set-condition-code flags. + */ + static void + store_vector4(const struct prog_instruction *inst, + struct gl_program_machine *machine, const GLfloat value[4]) + { + const struct prog_dst_register *dstReg = &(inst->DstReg); + const GLboolean clamp = inst->SaturateMode == SATURATE_ZERO_ONE; + GLuint writeMask = dstReg->WriteMask; + GLfloat clampedValue[4]; + GLfloat *dst = get_dst_register_pointer(dstReg, machine); + + #if 0 + if (value[0] > 1.0e10 || + IS_INF_OR_NAN(value[0]) || + IS_INF_OR_NAN(value[1]) || + IS_INF_OR_NAN(value[2]) || IS_INF_OR_NAN(value[3])) + printf("store %g %g %g %g\n", value[0], value[1], value[2], value[3]); + #endif + + if (clamp) { + clampedValue[0] = CLAMP(value[0], 0.0F, 1.0F); + clampedValue[1] = CLAMP(value[1], 0.0F, 1.0F); + clampedValue[2] = CLAMP(value[2], 0.0F, 1.0F); + clampedValue[3] = CLAMP(value[3], 0.0F, 1.0F); + value = clampedValue; + } + + if (dstReg->CondMask != COND_TR) { + /* condition codes may turn off some writes */ + if (writeMask & WRITEMASK_X) { + if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 0)], + dstReg->CondMask)) + writeMask &= ~WRITEMASK_X; + } + if (writeMask & WRITEMASK_Y) { + if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 1)], + dstReg->CondMask)) + writeMask &= ~WRITEMASK_Y; + } + if (writeMask & WRITEMASK_Z) { + if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 2)], + dstReg->CondMask)) + writeMask &= ~WRITEMASK_Z; + } + if (writeMask & WRITEMASK_W) { + if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 3)], + dstReg->CondMask)) + writeMask &= ~WRITEMASK_W; + } + } + + #ifdef NAN_CHECK + assert(!IS_INF_OR_NAN(value[0])); + assert(!IS_INF_OR_NAN(value[0])); + assert(!IS_INF_OR_NAN(value[0])); + assert(!IS_INF_OR_NAN(value[0])); + #endif + + if (writeMask & WRITEMASK_X) + dst[0] = value[0]; + if (writeMask & WRITEMASK_Y) + dst[1] = value[1]; + if (writeMask & WRITEMASK_Z) + dst[2] = value[2]; + if (writeMask & WRITEMASK_W) + dst[3] = value[3]; + + if (inst->CondUpdate) { + if (writeMask & WRITEMASK_X) + machine->CondCodes[0] = generate_cc(value[0]); + if (writeMask & WRITEMASK_Y) + machine->CondCodes[1] = generate_cc(value[1]); + if (writeMask & WRITEMASK_Z) + machine->CondCodes[2] = generate_cc(value[2]); + if (writeMask & WRITEMASK_W) + machine->CondCodes[3] = generate_cc(value[3]); + #if DEBUG_PROG + printf("CondCodes=(%s,%s,%s,%s) for:\n", + _mesa_condcode_string(machine->CondCodes[0]), + _mesa_condcode_string(machine->CondCodes[1]), + _mesa_condcode_string(machine->CondCodes[2]), + _mesa_condcode_string(machine->CondCodes[3])); + #endif + } + } + + + /** + * Store 4 uints into a register. Observe the set-condition-code flags. + */ + static void + store_vector4ui(const struct prog_instruction *inst, + struct gl_program_machine *machine, const GLuint value[4]) + { + const struct prog_dst_register *dstReg = &(inst->DstReg); + GLuint writeMask = dstReg->WriteMask; + GLuint *dst = (GLuint *) get_dst_register_pointer(dstReg, machine); + + if (dstReg->CondMask != COND_TR) { + /* condition codes may turn off some writes */ + if (writeMask & WRITEMASK_X) { + if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 0)], + dstReg->CondMask)) + writeMask &= ~WRITEMASK_X; + } + if (writeMask & WRITEMASK_Y) { + if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 1)], + dstReg->CondMask)) + writeMask &= ~WRITEMASK_Y; + } + if (writeMask & WRITEMASK_Z) { + if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 2)], + dstReg->CondMask)) + writeMask &= ~WRITEMASK_Z; + } + if (writeMask & WRITEMASK_W) { + if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 3)], + dstReg->CondMask)) + writeMask &= ~WRITEMASK_W; + } + } + + if (writeMask & WRITEMASK_X) + dst[0] = value[0]; + if (writeMask & WRITEMASK_Y) + dst[1] = value[1]; + if (writeMask & WRITEMASK_Z) + dst[2] = value[2]; + if (writeMask & WRITEMASK_W) + dst[3] = value[3]; + + if (inst->CondUpdate) { + if (writeMask & WRITEMASK_X) + machine->CondCodes[0] = generate_cc((float)value[0]); + if (writeMask & WRITEMASK_Y) + machine->CondCodes[1] = generate_cc((float)value[1]); + if (writeMask & WRITEMASK_Z) + machine->CondCodes[2] = generate_cc((float)value[2]); + if (writeMask & WRITEMASK_W) + machine->CondCodes[3] = generate_cc((float)value[3]); + #if DEBUG_PROG + printf("CondCodes=(%s,%s,%s,%s) for:\n", + _mesa_condcode_string(machine->CondCodes[0]), + _mesa_condcode_string(machine->CondCodes[1]), + _mesa_condcode_string(machine->CondCodes[2]), + _mesa_condcode_string(machine->CondCodes[3])); + #endif + } + } + + + + /** + * Execute the given vertex/fragment program. + * + * \param ctx rendering context + * \param program the program to execute + * \param machine machine state (must be initialized) + * \return GL_TRUE if program completed or GL_FALSE if program executed KIL. + */ + GLboolean + _mesa_execute_program(GLcontext * ctx, + const struct gl_program *program, + struct gl_program_machine *machine) + { + const GLuint numInst = program->NumInstructions; + const GLuint maxExec = 10000; + GLuint pc, numExec = 0; + + machine->CurProgram = program; + + if (DEBUG_PROG) { + printf("execute program %u --------------------\n", program->Id); + } + + if (program->Target == GL_VERTEX_PROGRAM_ARB) { + machine->EnvParams = ctx->VertexProgram.Parameters; + } + else { + machine->EnvParams = ctx->FragmentProgram.Parameters; + } + + for (pc = 0; pc < numInst; pc++) { + const struct prog_instruction *inst = program->Instructions + pc; + + if (DEBUG_PROG) { + _mesa_print_instruction(inst); + } + + switch (inst->Opcode) { + case OPCODE_ABS: + { + GLfloat a[4], result[4]; + fetch_vector4(&inst->SrcReg[0], machine, a); + result[0] = FABSF(a[0]); + result[1] = FABSF(a[1]); + result[2] = FABSF(a[2]); + result[3] = FABSF(a[3]); + store_vector4(inst, machine, result); + } + break; + case OPCODE_ADD: + { + GLfloat a[4], b[4], result[4]; + fetch_vector4(&inst->SrcReg[0], machine, a); + fetch_vector4(&inst->SrcReg[1], machine, b); + result[0] = a[0] + b[0]; + result[1] = a[1] + b[1]; + result[2] = a[2] + b[2]; + result[3] = a[3] + b[3]; + store_vector4(inst, machine, result); + if (DEBUG_PROG) { + printf("ADD (%g %g %g %g) = (%g %g %g %g) + (%g %g %g %g)\n", + result[0], result[1], result[2], result[3], + a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]); + } + } + break; + case OPCODE_AND: /* bitwise AND */ + { + GLuint a[4], b[4], result[4]; + fetch_vector4ui(&inst->SrcReg[0], machine, a); + fetch_vector4ui(&inst->SrcReg[1], machine, b); + result[0] = a[0] & b[0]; + result[1] = a[1] & b[1]; + result[2] = a[2] & b[2]; + result[3] = a[3] & b[3]; + store_vector4ui(inst, machine, result); + } + break; + case OPCODE_ARL: + { + GLfloat t[4]; + fetch_vector4(&inst->SrcReg[0], machine, t); + machine->AddressReg[0][0] = IFLOOR(t[0]); + if (DEBUG_PROG) { + printf("ARL %d\n", machine->AddressReg[0][0]); + } + } + break; + case OPCODE_BGNLOOP: + /* no-op */ + ASSERT(program->Instructions[inst->BranchTarget].Opcode + == OPCODE_ENDLOOP); + break; + case OPCODE_ENDLOOP: + /* subtract 1 here since pc is incremented by for(pc) loop */ + ASSERT(program->Instructions[inst->BranchTarget].Opcode + == OPCODE_BGNLOOP); + pc = inst->BranchTarget - 1; /* go to matching BNGLOOP */ + break; + case OPCODE_BGNSUB: /* begin subroutine */ + break; + case OPCODE_ENDSUB: /* end subroutine */ + break; + case OPCODE_BRA: /* branch (conditional) */ + if (eval_condition(machine, inst)) { + /* take branch */ + /* Subtract 1 here since we'll do pc++ below */ + pc = inst->BranchTarget - 1; + } + break; + case OPCODE_BRK: /* break out of loop (conditional) */ + ASSERT(program->Instructions[inst->BranchTarget].Opcode + == OPCODE_ENDLOOP); + if (eval_condition(machine, inst)) { + /* break out of loop */ + /* pc++ at end of for-loop will put us after the ENDLOOP inst */ + pc = inst->BranchTarget; + } + break; + case OPCODE_CONT: /* continue loop (conditional) */ + ASSERT(program->Instructions[inst->BranchTarget].Opcode + == OPCODE_ENDLOOP); + if (eval_condition(machine, inst)) { + /* continue at ENDLOOP */ + /* Subtract 1 here since we'll do pc++ at end of for-loop */ + pc = inst->BranchTarget - 1; + } + break; + case OPCODE_CAL: /* Call subroutine (conditional) */ + if (eval_condition(machine, inst)) { + /* call the subroutine */ + if (machine->StackDepth >= MAX_PROGRAM_CALL_DEPTH) { + return GL_TRUE; /* Per GL_NV_vertex_program2 spec */ + } + machine->CallStack[machine->StackDepth++] = pc + 1; /* next inst */ + /* Subtract 1 here since we'll do pc++ at end of for-loop */ + pc = inst->BranchTarget - 1; + } + break; + case OPCODE_CMP: + { + GLfloat a[4], b[4], c[4], result[4]; + fetch_vector4(&inst->SrcReg[0], machine, a); + fetch_vector4(&inst->SrcReg[1], machine, b); + fetch_vector4(&inst->SrcReg[2], machine, c); + result[0] = a[0] < 0.0F ? b[0] : c[0]; + result[1] = a[1] < 0.0F ? b[1] : c[1]; + result[2] = a[2] < 0.0F ? b[2] : c[2]; + result[3] = a[3] < 0.0F ? b[3] : c[3]; + store_vector4(inst, machine, result); + } + break; + case OPCODE_COS: + { + GLfloat a[4], result[4]; + fetch_vector1(&inst->SrcReg[0], machine, a); + result[0] = result[1] = result[2] = result[3] + = (GLfloat) cos(a[0]); + store_vector4(inst, machine, result); + } + break; + case OPCODE_DDX: /* Partial derivative with respect to X */ + { + GLfloat result[4]; + fetch_vector4_deriv(ctx, &inst->SrcReg[0], machine, + 'X', result); + store_vector4(inst, machine, result); + } + break; + case OPCODE_DDY: /* Partial derivative with respect to Y */ + { + GLfloat result[4]; + fetch_vector4_deriv(ctx, &inst->SrcReg[0], machine, + 'Y', result); + store_vector4(inst, machine, result); + } + break; + case OPCODE_DP2: + { + GLfloat a[4], b[4], result[4]; + fetch_vector4(&inst->SrcReg[0], machine, a); + fetch_vector4(&inst->SrcReg[1], machine, b); + result[0] = result[1] = result[2] = result[3] = DOT2(a, b); + store_vector4(inst, machine, result); + if (DEBUG_PROG) { + printf("DP2 %g = (%g %g) . (%g %g)\n", + result[0], a[0], a[1], b[0], b[1]); + } + } + break; + case OPCODE_DP2A: + { + GLfloat a[4], b[4], c, result[4]; + fetch_vector4(&inst->SrcReg[0], machine, a); + fetch_vector4(&inst->SrcReg[1], machine, b); + fetch_vector1(&inst->SrcReg[1], machine, &c); + result[0] = result[1] = result[2] = result[3] = DOT2(a, b) + c; + store_vector4(inst, machine, result); + if (DEBUG_PROG) { + printf("DP2A %g = (%g %g) . (%g %g) + %g\n", + result[0], a[0], a[1], b[0], b[1], c); + } + } + break; + case OPCODE_DP3: + { + GLfloat a[4], b[4], result[4]; + fetch_vector4(&inst->SrcReg[0], machine, a); + fetch_vector4(&inst->SrcReg[1], machine, b); + result[0] = result[1] = result[2] = result[3] = DOT3(a, b); + store_vector4(inst, machine, result); + if (DEBUG_PROG) { + printf("DP3 %g = (%g %g %g) . (%g %g %g)\n", + result[0], a[0], a[1], a[2], b[0], b[1], b[2]); + } + } + break; + case OPCODE_DP4: + { + GLfloat a[4], b[4], result[4]; + fetch_vector4(&inst->SrcReg[0], machine, a); + fetch_vector4(&inst->SrcReg[1], machine, b); + result[0] = result[1] = result[2] = result[3] = DOT4(a, b); + store_vector4(inst, machine, result); + if (DEBUG_PROG) { + printf("DP4 %g = (%g, %g %g %g) . (%g, %g %g %g)\n", + result[0], a[0], a[1], a[2], a[3], + b[0], b[1], b[2], b[3]); + } + } + break; + case OPCODE_DPH: + { + GLfloat a[4], b[4], result[4]; + fetch_vector4(&inst->SrcReg[0], machine, a); + fetch_vector4(&inst->SrcReg[1], machine, b); + result[0] = result[1] = result[2] = result[3] = DOT3(a, b) + b[3]; + store_vector4(inst, machine, result); + } + break; + case OPCODE_DST: /* Distance vector */ + { + GLfloat a[4], b[4], result[4]; + fetch_vector4(&inst->SrcReg[0], machine, a); + fetch_vector4(&inst->SrcReg[1], machine, b); + result[0] = 1.0F; + result[1] = a[1] * b[1]; + result[2] = a[2]; + result[3] = b[3]; + store_vector4(inst, machine, result); + } + break; + case OPCODE_EXP: + { + GLfloat t[4], q[4], floor_t0; + fetch_vector1(&inst->SrcReg[0], machine, t); + floor_t0 = FLOORF(t[0]); + if (floor_t0 > FLT_MAX_EXP) { + SET_POS_INFINITY(q[0]); + SET_POS_INFINITY(q[2]); + } + else if (floor_t0 < FLT_MIN_EXP) { + q[0] = 0.0F; + q[2] = 0.0F; + } + else { + q[0] = LDEXPF(1.0, (int) floor_t0); + /* Note: GL_NV_vertex_program expects + * result.z = result.x * APPX(result.y) + * We do what the ARB extension says. + */ + q[2] = (GLfloat) pow(2.0, t[0]); + } + q[1] = t[0] - floor_t0; + q[3] = 1.0F; + store_vector4( inst, machine, q ); + } + break; + case OPCODE_EX2: /* Exponential base 2 */ + { + GLfloat a[4], result[4], val; + fetch_vector1(&inst->SrcReg[0], machine, a); + val = (GLfloat) pow(2.0, a[0]); + /* + if (IS_INF_OR_NAN(val)) + val = 1.0e10; + */ + result[0] = result[1] = result[2] = result[3] = val; + store_vector4(inst, machine, result); + } + break; + case OPCODE_FLR: + { + GLfloat a[4], result[4]; + fetch_vector4(&inst->SrcReg[0], machine, a); + result[0] = FLOORF(a[0]); + result[1] = FLOORF(a[1]); + result[2] = FLOORF(a[2]); + result[3] = FLOORF(a[3]); + store_vector4(inst, machine, result); + } + break; + case OPCODE_FRC: + { + GLfloat a[4], result[4]; + fetch_vector4(&inst->SrcReg[0], machine, a); + result[0] = a[0] - FLOORF(a[0]); + result[1] = a[1] - FLOORF(a[1]); + result[2] = a[2] - FLOORF(a[2]); + result[3] = a[3] - FLOORF(a[3]); + store_vector4(inst, machine, result); + } + break; + case OPCODE_IF: + { + GLboolean cond; + ASSERT(program->Instructions[inst->BranchTarget].Opcode + == OPCODE_ELSE || + program->Instructions[inst->BranchTarget].Opcode + == OPCODE_ENDIF); + /* eval condition */ + if (inst->SrcReg[0].File != PROGRAM_UNDEFINED) { + GLfloat a[4]; + fetch_vector1(&inst->SrcReg[0], machine, a); + cond = (a[0] != 0.0); + } + else { + cond = eval_condition(machine, inst); + } + if (DEBUG_PROG) { + printf("IF: %d\n", cond); + } + /* do if/else */ + if (cond) { + /* do if-clause (just continue execution) */ + } + else { + /* go to the instruction after ELSE or ENDIF */ + assert(inst->BranchTarget >= 0); + pc = inst->BranchTarget; + } + } + break; + case OPCODE_ELSE: + /* goto ENDIF */ + ASSERT(program->Instructions[inst->BranchTarget].Opcode + == OPCODE_ENDIF); + assert(inst->BranchTarget >= 0); + pc = inst->BranchTarget; + break; + case OPCODE_ENDIF: + /* nothing */ + break; + case OPCODE_KIL_NV: /* NV_f_p only (conditional) */ + if (eval_condition(machine, inst)) { + return GL_FALSE; + } + break; + case OPCODE_KIL: /* ARB_f_p only */ + { + GLfloat a[4]; + fetch_vector4(&inst->SrcReg[0], machine, a); + if (DEBUG_PROG) { + printf("KIL if (%g %g %g %g) <= 0.0\n", + a[0], a[1], a[2], a[3]); + } + + if (a[0] < 0.0F || a[1] < 0.0F || a[2] < 0.0F || a[3] < 0.0F) { + return GL_FALSE; + } + } + break; + case OPCODE_LG2: /* log base 2 */ + { + GLfloat a[4], result[4], val; + fetch_vector1(&inst->SrcReg[0], machine, a); + /* The fast LOG2 macro doesn't meet the precision requirements. + */ + if (a[0] == 0.0F) { + val = -FLT_MAX; + } + else { + val = (float)(log(a[0]) * 1.442695F); + } + result[0] = result[1] = result[2] = result[3] = val; + store_vector4(inst, machine, result); + } + break; + case OPCODE_LIT: + { + const GLfloat epsilon = 1.0F / 256.0F; /* from NV VP spec */ + GLfloat a[4], result[4]; + fetch_vector4(&inst->SrcReg[0], machine, a); + a[0] = MAX2(a[0], 0.0F); + a[1] = MAX2(a[1], 0.0F); + /* XXX ARB version clamps a[3], NV version doesn't */ + a[3] = CLAMP(a[3], -(128.0F - epsilon), (128.0F - epsilon)); + result[0] = 1.0F; + result[1] = a[0]; + /* XXX we could probably just use pow() here */ + if (a[0] > 0.0F) { + if (a[1] == 0.0 && a[3] == 0.0) + result[2] = 1.0F; + else + result[2] = (GLfloat) pow(a[1], a[3]); + } + else { + result[2] = 0.0F; + } + result[3] = 1.0F; + store_vector4(inst, machine, result); + if (DEBUG_PROG) { + printf("LIT (%g %g %g %g) : (%g %g %g %g)\n", + result[0], result[1], result[2], result[3], + a[0], a[1], a[2], a[3]); + } + } + break; + case OPCODE_LOG: + { + GLfloat t[4], q[4], abs_t0; + fetch_vector1(&inst->SrcReg[0], machine, t); + abs_t0 = FABSF(t[0]); + if (abs_t0 != 0.0F) { + /* Since we really can't handle infinite values on VMS + * like other OSes we'll use __MAXFLOAT to represent + * infinity. This may need some tweaking. + */ + #ifdef VMS + if (abs_t0 == __MAXFLOAT) + #else + if (IS_INF_OR_NAN(abs_t0)) + #endif + { + SET_POS_INFINITY(q[0]); + q[1] = 1.0F; + SET_POS_INFINITY(q[2]); + } + else { + int exponent; + GLfloat mantissa = FREXPF(t[0], &exponent); + q[0] = (GLfloat) (exponent - 1); + q[1] = (GLfloat) (2.0 * mantissa); /* map [.5, 1) -> [1, 2) */ + + /* The fast LOG2 macro doesn't meet the precision + * requirements. + */ + q[2] = (float)(log(t[0]) * 1.442695F); + } + } + else { + SET_NEG_INFINITY(q[0]); + q[1] = 1.0F; + SET_NEG_INFINITY(q[2]); + } + q[3] = 1.0; + store_vector4(inst, machine, q); + } + break; + case OPCODE_LRP: + { + GLfloat a[4], b[4], c[4], result[4]; + fetch_vector4(&inst->SrcReg[0], machine, a); + fetch_vector4(&inst->SrcReg[1], machine, b); + fetch_vector4(&inst->SrcReg[2], machine, c); + result[0] = a[0] * b[0] + (1.0F - a[0]) * c[0]; + result[1] = a[1] * b[1] + (1.0F - a[1]) * c[1]; + result[2] = a[2] * b[2] + (1.0F - a[2]) * c[2]; + result[3] = a[3] * b[3] + (1.0F - a[3]) * c[3]; + store_vector4(inst, machine, result); + if (DEBUG_PROG) { + printf("LRP (%g %g %g %g) = (%g %g %g %g), " + "(%g %g %g %g), (%g %g %g %g)\n", + result[0], result[1], result[2], result[3], + a[0], a[1], a[2], a[3], + b[0], b[1], b[2], b[3], c[0], c[1], c[2], c[3]); + } + } + break; + case OPCODE_MAD: + { + GLfloat a[4], b[4], c[4], result[4]; + fetch_vector4(&inst->SrcReg[0], machine, a); + fetch_vector4(&inst->SrcReg[1], machine, b); + fetch_vector4(&inst->SrcReg[2], machine, c); + result[0] = a[0] * b[0] + c[0]; + result[1] = a[1] * b[1] + c[1]; + result[2] = a[2] * b[2] + c[2]; + result[3] = a[3] * b[3] + c[3]; + store_vector4(inst, machine, result); + if (DEBUG_PROG) { + printf("MAD (%g %g %g %g) = (%g %g %g %g) * " + "(%g %g %g %g) + (%g %g %g %g)\n", + result[0], result[1], result[2], result[3], + a[0], a[1], a[2], a[3], + b[0], b[1], b[2], b[3], c[0], c[1], c[2], c[3]); + } + } + break; + case OPCODE_MAX: + { + GLfloat a[4], b[4], result[4]; + fetch_vector4(&inst->SrcReg[0], machine, a); + fetch_vector4(&inst->SrcReg[1], machine, b); + result[0] = MAX2(a[0], b[0]); + result[1] = MAX2(a[1], b[1]); + result[2] = MAX2(a[2], b[2]); + result[3] = MAX2(a[3], b[3]); + store_vector4(inst, machine, result); + if (DEBUG_PROG) { + printf("MAX (%g %g %g %g) = (%g %g %g %g), (%g %g %g %g)\n", + result[0], result[1], result[2], result[3], + a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]); + } + } + break; + case OPCODE_MIN: + { + GLfloat a[4], b[4], result[4]; + fetch_vector4(&inst->SrcReg[0], machine, a); + fetch_vector4(&inst->SrcReg[1], machine, b); + result[0] = MIN2(a[0], b[0]); + result[1] = MIN2(a[1], b[1]); + result[2] = MIN2(a[2], b[2]); + result[3] = MIN2(a[3], b[3]); + store_vector4(inst, machine, result); + } + break; + case OPCODE_MOV: + { + GLfloat result[4]; + fetch_vector4(&inst->SrcReg[0], machine, result); + store_vector4(inst, machine, result); + if (DEBUG_PROG) { + printf("MOV (%g %g %g %g)\n", + result[0], result[1], result[2], result[3]); + } + } + break; + case OPCODE_MUL: + { + GLfloat a[4], b[4], result[4]; + fetch_vector4(&inst->SrcReg[0], machine, a); + fetch_vector4(&inst->SrcReg[1], machine, b); + result[0] = a[0] * b[0]; + result[1] = a[1] * b[1]; + result[2] = a[2] * b[2]; + result[3] = a[3] * b[3]; + store_vector4(inst, machine, result); + if (DEBUG_PROG) { + printf("MUL (%g %g %g %g) = (%g %g %g %g) * (%g %g %g %g)\n", + result[0], result[1], result[2], result[3], + a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]); + } + } + break; + case OPCODE_NOISE1: + { + GLfloat a[4], result[4]; + fetch_vector1(&inst->SrcReg[0], machine, a); + result[0] = + result[1] = + result[2] = + result[3] = _mesa_noise1(a[0]); + store_vector4(inst, machine, result); + } + break; + case OPCODE_NOISE2: + { + GLfloat a[4], result[4]; + fetch_vector4(&inst->SrcReg[0], machine, a); + result[0] = + result[1] = + result[2] = result[3] = _mesa_noise2(a[0], a[1]); + store_vector4(inst, machine, result); + } + break; + case OPCODE_NOISE3: + { + GLfloat a[4], result[4]; + fetch_vector4(&inst->SrcReg[0], machine, a); + result[0] = + result[1] = + result[2] = + result[3] = _mesa_noise3(a[0], a[1], a[2]); + store_vector4(inst, machine, result); + } + break; + case OPCODE_NOISE4: + { + GLfloat a[4], result[4]; + fetch_vector4(&inst->SrcReg[0], machine, a); + result[0] = + result[1] = + result[2] = + result[3] = _mesa_noise4(a[0], a[1], a[2], a[3]); + store_vector4(inst, machine, result); + } + break; + case OPCODE_NOP: + break; + case OPCODE_NOT: /* bitwise NOT */ + { + GLuint a[4], result[4]; + fetch_vector4ui(&inst->SrcReg[0], machine, a); + result[0] = ~a[0]; + result[1] = ~a[1]; + result[2] = ~a[2]; + result[3] = ~a[3]; + store_vector4ui(inst, machine, result); + } + break; + case OPCODE_NRM3: /* 3-component normalization */ + { + GLfloat a[4], result[4]; + GLfloat tmp; + fetch_vector4(&inst->SrcReg[0], machine, a); + tmp = a[0] * a[0] + a[1] * a[1] + a[2] * a[2]; + if (tmp != 0.0F) + tmp = INV_SQRTF(tmp); + result[0] = tmp * a[0]; + result[1] = tmp * a[1]; + result[2] = tmp * a[2]; + result[3] = 0.0; /* undefined, but prevent valgrind warnings */ + store_vector4(inst, machine, result); + } + break; + case OPCODE_NRM4: /* 4-component normalization */ + { + GLfloat a[4], result[4]; + GLfloat tmp; + fetch_vector4(&inst->SrcReg[0], machine, a); + tmp = a[0] * a[0] + a[1] * a[1] + a[2] * a[2] + a[3] * a[3]; + if (tmp != 0.0F) + tmp = INV_SQRTF(tmp); + result[0] = tmp * a[0]; + result[1] = tmp * a[1]; + result[2] = tmp * a[2]; + result[3] = tmp * a[3]; + store_vector4(inst, machine, result); + } + break; + case OPCODE_OR: /* bitwise OR */ + { + GLuint a[4], b[4], result[4]; + fetch_vector4ui(&inst->SrcReg[0], machine, a); + fetch_vector4ui(&inst->SrcReg[1], machine, b); + result[0] = a[0] | b[0]; + result[1] = a[1] | b[1]; + result[2] = a[2] | b[2]; + result[3] = a[3] | b[3]; + store_vector4ui(inst, machine, result); + } + break; + case OPCODE_PK2H: /* pack two 16-bit floats in one 32-bit float */ + { + GLfloat a[4]; + GLuint result[4]; + GLhalfNV hx, hy; + fetch_vector4(&inst->SrcReg[0], machine, a); + hx = _mesa_float_to_half(a[0]); + hy = _mesa_float_to_half(a[1]); + result[0] = + result[1] = + result[2] = + result[3] = hx | (hy << 16); + store_vector4ui(inst, machine, result); + } + break; + case OPCODE_PK2US: /* pack two GLushorts into one 32-bit float */ + { + GLfloat a[4]; + GLuint result[4], usx, usy; + fetch_vector4(&inst->SrcReg[0], machine, a); + a[0] = CLAMP(a[0], 0.0F, 1.0F); + a[1] = CLAMP(a[1], 0.0F, 1.0F); + usx = IROUND(a[0] * 65535.0F); + usy = IROUND(a[1] * 65535.0F); + result[0] = + result[1] = + result[2] = + result[3] = usx | (usy << 16); + store_vector4ui(inst, machine, result); + } + break; + case OPCODE_PK4B: /* pack four GLbytes into one 32-bit float */ + { + GLfloat a[4]; + GLuint result[4], ubx, uby, ubz, ubw; + fetch_vector4(&inst->SrcReg[0], machine, a); + a[0] = CLAMP(a[0], -128.0F / 127.0F, 1.0F); + a[1] = CLAMP(a[1], -128.0F / 127.0F, 1.0F); + a[2] = CLAMP(a[2], -128.0F / 127.0F, 1.0F); + a[3] = CLAMP(a[3], -128.0F / 127.0F, 1.0F); + ubx = IROUND(127.0F * a[0] + 128.0F); + uby = IROUND(127.0F * a[1] + 128.0F); + ubz = IROUND(127.0F * a[2] + 128.0F); + ubw = IROUND(127.0F * a[3] + 128.0F); + result[0] = + result[1] = + result[2] = + result[3] = ubx | (uby << 8) | (ubz << 16) | (ubw << 24); + store_vector4ui(inst, machine, result); + } + break; + case OPCODE_PK4UB: /* pack four GLubytes into one 32-bit float */ + { + GLfloat a[4]; + GLuint result[4], ubx, uby, ubz, ubw; + fetch_vector4(&inst->SrcReg[0], machine, a); + a[0] = CLAMP(a[0], 0.0F, 1.0F); + a[1] = CLAMP(a[1], 0.0F, 1.0F); + a[2] = CLAMP(a[2], 0.0F, 1.0F); + a[3] = CLAMP(a[3], 0.0F, 1.0F); + ubx = IROUND(255.0F * a[0]); + uby = IROUND(255.0F * a[1]); + ubz = IROUND(255.0F * a[2]); + ubw = IROUND(255.0F * a[3]); + result[0] = + result[1] = + result[2] = + result[3] = ubx | (uby << 8) | (ubz << 16) | (ubw << 24); + store_vector4ui(inst, machine, result); + } + break; + case OPCODE_POW: + { + GLfloat a[4], b[4], result[4]; + fetch_vector1(&inst->SrcReg[0], machine, a); + fetch_vector1(&inst->SrcReg[1], machine, b); + result[0] = result[1] = result[2] = result[3] + = (GLfloat) pow(a[0], b[0]); + store_vector4(inst, machine, result); + } + break; + case OPCODE_RCP: + { + GLfloat a[4], result[4]; + fetch_vector1(&inst->SrcReg[0], machine, a); + if (DEBUG_PROG) { + if (a[0] == 0) + printf("RCP(0)\n"); + else if (IS_INF_OR_NAN(a[0])) + printf("RCP(inf)\n"); + } + result[0] = result[1] = result[2] = result[3] = 1.0F / a[0]; + store_vector4(inst, machine, result); + } + break; + case OPCODE_RET: /* return from subroutine (conditional) */ + if (eval_condition(machine, inst)) { + if (machine->StackDepth == 0) { + return GL_TRUE; /* Per GL_NV_vertex_program2 spec */ + } + /* subtract one because of pc++ in the for loop */ + pc = machine->CallStack[--machine->StackDepth] - 1; + } + break; + case OPCODE_RFL: /* reflection vector */ + { + GLfloat axis[4], dir[4], result[4], tmpX, tmpW; + fetch_vector4(&inst->SrcReg[0], machine, axis); + fetch_vector4(&inst->SrcReg[1], machine, dir); + tmpW = DOT3(axis, axis); + tmpX = (2.0F * DOT3(axis, dir)) / tmpW; + result[0] = tmpX * axis[0] - dir[0]; + result[1] = tmpX * axis[1] - dir[1]; + result[2] = tmpX * axis[2] - dir[2]; + /* result[3] is never written! XXX enforce in parser! */ + store_vector4(inst, machine, result); + } + break; + case OPCODE_RSQ: /* 1 / sqrt() */ + { + GLfloat a[4], result[4]; + fetch_vector1(&inst->SrcReg[0], machine, a); + a[0] = FABSF(a[0]); + result[0] = result[1] = result[2] = result[3] = INV_SQRTF(a[0]); + store_vector4(inst, machine, result); + if (DEBUG_PROG) { + printf("RSQ %g = 1/sqrt(|%g|)\n", result[0], a[0]); + } + } + break; + case OPCODE_SCS: /* sine and cos */ + { + GLfloat a[4], result[4]; + fetch_vector1(&inst->SrcReg[0], machine, a); + result[0] = (GLfloat) cos(a[0]); + result[1] = (GLfloat) sin(a[0]); + result[2] = 0.0; /* undefined! */ + result[3] = 0.0; /* undefined! */ + store_vector4(inst, machine, result); + } + break; + case OPCODE_SEQ: /* set on equal */ + { + GLfloat a[4], b[4], result[4]; + fetch_vector4(&inst->SrcReg[0], machine, a); + fetch_vector4(&inst->SrcReg[1], machine, b); + result[0] = (a[0] == b[0]) ? 1.0F : 0.0F; + result[1] = (a[1] == b[1]) ? 1.0F : 0.0F; + result[2] = (a[2] == b[2]) ? 1.0F : 0.0F; + result[3] = (a[3] == b[3]) ? 1.0F : 0.0F; + store_vector4(inst, machine, result); + if (DEBUG_PROG) { + printf("SEQ (%g %g %g %g) = (%g %g %g %g) == (%g %g %g %g)\n", + result[0], result[1], result[2], result[3], + a[0], a[1], a[2], a[3], + b[0], b[1], b[2], b[3]); + } + } + break; + case OPCODE_SFL: /* set false, operands ignored */ + { + static const GLfloat result[4] = { 0.0F, 0.0F, 0.0F, 0.0F }; + store_vector4(inst, machine, result); + } + break; + case OPCODE_SGE: /* set on greater or equal */ + { + GLfloat a[4], b[4], result[4]; + fetch_vector4(&inst->SrcReg[0], machine, a); + fetch_vector4(&inst->SrcReg[1], machine, b); + result[0] = (a[0] >= b[0]) ? 1.0F : 0.0F; + result[1] = (a[1] >= b[1]) ? 1.0F : 0.0F; + result[2] = (a[2] >= b[2]) ? 1.0F : 0.0F; + result[3] = (a[3] >= b[3]) ? 1.0F : 0.0F; + store_vector4(inst, machine, result); + if (DEBUG_PROG) { + printf("SGE (%g %g %g %g) = (%g %g %g %g) >= (%g %g %g %g)\n", + result[0], result[1], result[2], result[3], + a[0], a[1], a[2], a[3], + b[0], b[1], b[2], b[3]); + } + } + break; + case OPCODE_SGT: /* set on greater */ + { + GLfloat a[4], b[4], result[4]; + fetch_vector4(&inst->SrcReg[0], machine, a); + fetch_vector4(&inst->SrcReg[1], machine, b); + result[0] = (a[0] > b[0]) ? 1.0F : 0.0F; + result[1] = (a[1] > b[1]) ? 1.0F : 0.0F; + result[2] = (a[2] > b[2]) ? 1.0F : 0.0F; + result[3] = (a[3] > b[3]) ? 1.0F : 0.0F; + store_vector4(inst, machine, result); + if (DEBUG_PROG) { + printf("SGT (%g %g %g %g) = (%g %g %g %g) > (%g %g %g %g)\n", + result[0], result[1], result[2], result[3], + a[0], a[1], a[2], a[3], + b[0], b[1], b[2], b[3]); + } + } + break; + case OPCODE_SIN: + { + GLfloat a[4], result[4]; + fetch_vector1(&inst->SrcReg[0], machine, a); + result[0] = result[1] = result[2] = result[3] + = (GLfloat) sin(a[0]); + store_vector4(inst, machine, result); + } + break; + case OPCODE_SLE: /* set on less or equal */ + { + GLfloat a[4], b[4], result[4]; + fetch_vector4(&inst->SrcReg[0], machine, a); + fetch_vector4(&inst->SrcReg[1], machine, b); + result[0] = (a[0] <= b[0]) ? 1.0F : 0.0F; + result[1] = (a[1] <= b[1]) ? 1.0F : 0.0F; + result[2] = (a[2] <= b[2]) ? 1.0F : 0.0F; + result[3] = (a[3] <= b[3]) ? 1.0F : 0.0F; + store_vector4(inst, machine, result); + if (DEBUG_PROG) { + printf("SLE (%g %g %g %g) = (%g %g %g %g) <= (%g %g %g %g)\n", + result[0], result[1], result[2], result[3], + a[0], a[1], a[2], a[3], + b[0], b[1], b[2], b[3]); + } + } + break; + case OPCODE_SLT: /* set on less */ + { + GLfloat a[4], b[4], result[4]; + fetch_vector4(&inst->SrcReg[0], machine, a); + fetch_vector4(&inst->SrcReg[1], machine, b); + result[0] = (a[0] < b[0]) ? 1.0F : 0.0F; + result[1] = (a[1] < b[1]) ? 1.0F : 0.0F; + result[2] = (a[2] < b[2]) ? 1.0F : 0.0F; + result[3] = (a[3] < b[3]) ? 1.0F : 0.0F; + store_vector4(inst, machine, result); + if (DEBUG_PROG) { + printf("SLT (%g %g %g %g) = (%g %g %g %g) < (%g %g %g %g)\n", + result[0], result[1], result[2], result[3], + a[0], a[1], a[2], a[3], + b[0], b[1], b[2], b[3]); + } + } + break; + case OPCODE_SNE: /* set on not equal */ + { + GLfloat a[4], b[4], result[4]; + fetch_vector4(&inst->SrcReg[0], machine, a); + fetch_vector4(&inst->SrcReg[1], machine, b); + result[0] = (a[0] != b[0]) ? 1.0F : 0.0F; + result[1] = (a[1] != b[1]) ? 1.0F : 0.0F; + result[2] = (a[2] != b[2]) ? 1.0F : 0.0F; + result[3] = (a[3] != b[3]) ? 1.0F : 0.0F; + store_vector4(inst, machine, result); + if (DEBUG_PROG) { + printf("SNE (%g %g %g %g) = (%g %g %g %g) != (%g %g %g %g)\n", + result[0], result[1], result[2], result[3], + a[0], a[1], a[2], a[3], + b[0], b[1], b[2], b[3]); + } + } + break; + case OPCODE_SSG: /* set sign (-1, 0 or +1) */ + { + GLfloat a[4], result[4]; + fetch_vector4(&inst->SrcReg[0], machine, a); + result[0] = (GLfloat) ((a[0] > 0.0F) - (a[0] < 0.0F)); + result[1] = (GLfloat) ((a[1] > 0.0F) - (a[1] < 0.0F)); + result[2] = (GLfloat) ((a[2] > 0.0F) - (a[2] < 0.0F)); + result[3] = (GLfloat) ((a[3] > 0.0F) - (a[3] < 0.0F)); + store_vector4(inst, machine, result); + } + break; + case OPCODE_STR: /* set true, operands ignored */ + { + static const GLfloat result[4] = { 1.0F, 1.0F, 1.0F, 1.0F }; + store_vector4(inst, machine, result); + } + break; + case OPCODE_SUB: + { + GLfloat a[4], b[4], result[4]; + fetch_vector4(&inst->SrcReg[0], machine, a); + fetch_vector4(&inst->SrcReg[1], machine, b); + result[0] = a[0] - b[0]; + result[1] = a[1] - b[1]; + result[2] = a[2] - b[2]; + result[3] = a[3] - b[3]; + store_vector4(inst, machine, result); + if (DEBUG_PROG) { + printf("SUB (%g %g %g %g) = (%g %g %g %g) - (%g %g %g %g)\n", + result[0], result[1], result[2], result[3], + a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]); + } + } + break; + case OPCODE_SWZ: /* extended swizzle */ + { + const struct prog_src_register *source = &inst->SrcReg[0]; + const GLfloat *src = get_src_register_pointer(source, machine); + GLfloat result[4]; + GLuint i; + for (i = 0; i < 4; i++) { + const GLuint swz = GET_SWZ(source->Swizzle, i); + if (swz == SWIZZLE_ZERO) + result[i] = 0.0; + else if (swz == SWIZZLE_ONE) + result[i] = 1.0; + else { + ASSERT(swz >= 0); + ASSERT(swz <= 3); + result[i] = src[swz]; + } + if (source->Negate & (1 << i)) + result[i] = -result[i]; + } + store_vector4(inst, machine, result); + } + break; + case OPCODE_TEX: /* Both ARB and NV frag prog */ + /* Simple texel lookup */ + { + GLfloat texcoord[4], color[4]; + fetch_vector4(&inst->SrcReg[0], machine, texcoord); + + fetch_texel(ctx, machine, inst, texcoord, 0.0, color); + + if (DEBUG_PROG) { + printf("TEX (%g, %g, %g, %g) = texture[%d][%g, %g, %g, %g]\n", + color[0], color[1], color[2], color[3], + inst->TexSrcUnit, + texcoord[0], texcoord[1], texcoord[2], texcoord[3]); + } + store_vector4(inst, machine, color); + } + break; + case OPCODE_TXB: /* GL_ARB_fragment_program only */ + /* Texel lookup with LOD bias */ + { + GLfloat texcoord[4], color[4], lodBias; + + fetch_vector4(&inst->SrcReg[0], machine, texcoord); + + /* texcoord[3] is the bias to add to lambda */ + lodBias = texcoord[3]; + + fetch_texel(ctx, machine, inst, texcoord, lodBias, color); + + store_vector4(inst, machine, color); + } + break; + case OPCODE_TXD: /* GL_NV_fragment_program only */ + /* Texture lookup w/ partial derivatives for LOD */ + { + GLfloat texcoord[4], dtdx[4], dtdy[4], color[4]; + fetch_vector4(&inst->SrcReg[0], machine, texcoord); + fetch_vector4(&inst->SrcReg[1], machine, dtdx); + fetch_vector4(&inst->SrcReg[2], machine, dtdy); + machine->FetchTexelDeriv(ctx, texcoord, dtdx, dtdy, + 0.0, /* lodBias */ + inst->TexSrcUnit, color); + store_vector4(inst, machine, color); + } + break; + case OPCODE_TXP: /* GL_ARB_fragment_program only */ + /* Texture lookup w/ projective divide */ + { + GLfloat texcoord[4], color[4]; + + fetch_vector4(&inst->SrcReg[0], machine, texcoord); + /* Not so sure about this test - if texcoord[3] is + * zero, we'd probably be fine except for an ASSERT in + * IROUND_POS() which gets triggered by the inf values created. + */ + if (texcoord[3] != 0.0) { + texcoord[0] /= texcoord[3]; + texcoord[1] /= texcoord[3]; + texcoord[2] /= texcoord[3]; + } + + fetch_texel(ctx, machine, inst, texcoord, 0.0, color); + + store_vector4(inst, machine, color); + } + break; + case OPCODE_TXP_NV: /* GL_NV_fragment_program only */ + /* Texture lookup w/ projective divide, as above, but do not + * do the divide by w if sampling from a cube map. + */ + { + GLfloat texcoord[4], color[4]; + + fetch_vector4(&inst->SrcReg[0], machine, texcoord); + if (inst->TexSrcTarget != TEXTURE_CUBE_INDEX && + texcoord[3] != 0.0) { + texcoord[0] /= texcoord[3]; + texcoord[1] /= texcoord[3]; + texcoord[2] /= texcoord[3]; + } + + fetch_texel(ctx, machine, inst, texcoord, 0.0, color); + + store_vector4(inst, machine, color); + } + break; + case OPCODE_TRUNC: /* truncate toward zero */ + { + GLfloat a[4], result[4]; + fetch_vector4(&inst->SrcReg[0], machine, a); + result[0] = (GLfloat) (GLint) a[0]; + result[1] = (GLfloat) (GLint) a[1]; + result[2] = (GLfloat) (GLint) a[2]; + result[3] = (GLfloat) (GLint) a[3]; + store_vector4(inst, machine, result); + } + break; + case OPCODE_UP2H: /* unpack two 16-bit floats */ + { + const GLuint raw = fetch_vector1ui(&inst->SrcReg[0], machine); + GLfloat result[4]; + GLushort hx, hy; + hx = raw & 0xffff; + hy = raw >> 16; + result[0] = result[2] = _mesa_half_to_float(hx); + result[1] = result[3] = _mesa_half_to_float(hy); + store_vector4(inst, machine, result); + } + break; + case OPCODE_UP2US: /* unpack two GLushorts */ + { + const GLuint raw = fetch_vector1ui(&inst->SrcReg[0], machine); + GLfloat result[4]; + GLushort usx, usy; + usx = raw & 0xffff; + usy = raw >> 16; + result[0] = result[2] = usx * (1.0f / 65535.0f); + result[1] = result[3] = usy * (1.0f / 65535.0f); + store_vector4(inst, machine, result); + } + break; + case OPCODE_UP4B: /* unpack four GLbytes */ + { + const GLuint raw = fetch_vector1ui(&inst->SrcReg[0], machine); + GLfloat result[4]; + result[0] = (((raw >> 0) & 0xff) - 128) / 127.0F; + result[1] = (((raw >> 8) & 0xff) - 128) / 127.0F; + result[2] = (((raw >> 16) & 0xff) - 128) / 127.0F; + result[3] = (((raw >> 24) & 0xff) - 128) / 127.0F; + store_vector4(inst, machine, result); + } + break; + case OPCODE_UP4UB: /* unpack four GLubytes */ + { + const GLuint raw = fetch_vector1ui(&inst->SrcReg[0], machine); + GLfloat result[4]; + result[0] = ((raw >> 0) & 0xff) / 255.0F; + result[1] = ((raw >> 8) & 0xff) / 255.0F; + result[2] = ((raw >> 16) & 0xff) / 255.0F; + result[3] = ((raw >> 24) & 0xff) / 255.0F; + store_vector4(inst, machine, result); + } + break; + case OPCODE_XOR: /* bitwise XOR */ + { + GLuint a[4], b[4], result[4]; + fetch_vector4ui(&inst->SrcReg[0], machine, a); + fetch_vector4ui(&inst->SrcReg[1], machine, b); + result[0] = a[0] ^ b[0]; + result[1] = a[1] ^ b[1]; + result[2] = a[2] ^ b[2]; + result[3] = a[3] ^ b[3]; + store_vector4ui(inst, machine, result); + } + break; + case OPCODE_XPD: /* cross product */ + { + GLfloat a[4], b[4], result[4]; + fetch_vector4(&inst->SrcReg[0], machine, a); + fetch_vector4(&inst->SrcReg[1], machine, b); + result[0] = a[1] * b[2] - a[2] * b[1]; + result[1] = a[2] * b[0] - a[0] * b[2]; + result[2] = a[0] * b[1] - a[1] * b[0]; + result[3] = 1.0; + store_vector4(inst, machine, result); + if (DEBUG_PROG) { + printf("XPD (%g %g %g %g) = (%g %g %g) X (%g %g %g)\n", + result[0], result[1], result[2], result[3], + a[0], a[1], a[2], b[0], b[1], b[2]); + } + } + break; + case OPCODE_X2D: /* 2-D matrix transform */ + { + GLfloat a[4], b[4], c[4], result[4]; + fetch_vector4(&inst->SrcReg[0], machine, a); + fetch_vector4(&inst->SrcReg[1], machine, b); + fetch_vector4(&inst->SrcReg[2], machine, c); + result[0] = a[0] + b[0] * c[0] + b[1] * c[1]; + result[1] = a[1] + b[0] * c[2] + b[1] * c[3]; + result[2] = a[2] + b[0] * c[0] + b[1] * c[1]; + result[3] = a[3] + b[0] * c[2] + b[1] * c[3]; + store_vector4(inst, machine, result); + } + break; + case OPCODE_PRINT: + { + if (inst->SrcReg[0].File != PROGRAM_UNDEFINED) { + GLfloat a[4]; + fetch_vector4(&inst->SrcReg[0], machine, a); + printf("%s%g, %g, %g, %g\n", (const char *) inst->Data, + a[0], a[1], a[2], a[3]); + } + else { + printf("%s\n", (const char *) inst->Data); + } + } + break; + case OPCODE_END: + return GL_TRUE; + default: + _mesa_problem(ctx, "Bad opcode %d in _mesa_execute_program", + inst->Opcode); + return GL_TRUE; /* return value doesn't matter */ + } + + numExec++; + if (numExec > maxExec) { - _mesa_problem(ctx, "Infinite loop detected in fragment program"); ++ static GLboolean reported = GL_FALSE; ++ if (!reported) { ++ _mesa_problem(ctx, "Infinite loop detected in fragment program"); ++ reported = GL_TRUE; ++ } + return GL_TRUE; + } + + } /* for pc */ + + return GL_TRUE; + } diff --cc src/mesa/program/prog_instruction.h index 00000000000,dacbc33704b..02df2089458 mode 000000,100644..100644 --- a/src/mesa/program/prog_instruction.h +++ b/src/mesa/program/prog_instruction.h @@@ -1,0 -1,454 +1,454 @@@ + /* + * Mesa 3-D graphics library + * Version: 7.3 + * + * Copyright (C) 1999-2008 Brian Paul All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + + + /** + * \file prog_instruction.h + * + * Vertex/fragment program instruction datatypes and constants. + * + * \author Brian Paul + * \author Keith Whitwell + * \author Ian Romanick + */ + + + #ifndef PROG_INSTRUCTION_H + #define PROG_INSTRUCTION_H + + + #include "main/mfeatures.h" + + + /** + * Swizzle indexes. + * Do not change! + */ + /*@{*/ + #define SWIZZLE_X 0 + #define SWIZZLE_Y 1 + #define SWIZZLE_Z 2 + #define SWIZZLE_W 3 + #define SWIZZLE_ZERO 4 /**< For SWZ instruction only */ + #define SWIZZLE_ONE 5 /**< For SWZ instruction only */ + #define SWIZZLE_NIL 7 /**< used during shader code gen (undefined value) */ + /*@}*/ + + #define MAKE_SWIZZLE4(a,b,c,d) (((a)<<0) | ((b)<<3) | ((c)<<6) | ((d)<<9)) + #define SWIZZLE_NOOP MAKE_SWIZZLE4(0,1,2,3) + #define GET_SWZ(swz, idx) (((swz) >> ((idx)*3)) & 0x7) + #define GET_BIT(msk, idx) (((msk) >> (idx)) & 0x1) + + #define SWIZZLE_XYZW MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W) + #define SWIZZLE_XXXX MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X) + #define SWIZZLE_YYYY MAKE_SWIZZLE4(SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y) + #define SWIZZLE_ZZZZ MAKE_SWIZZLE4(SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z) + #define SWIZZLE_WWWW MAKE_SWIZZLE4(SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W) + + + /** + * Writemask values, 1 bit per component. + */ + /*@{*/ + #define WRITEMASK_X 0x1 + #define WRITEMASK_Y 0x2 + #define WRITEMASK_XY 0x3 + #define WRITEMASK_Z 0x4 + #define WRITEMASK_XZ 0x5 + #define WRITEMASK_YZ 0x6 + #define WRITEMASK_XYZ 0x7 + #define WRITEMASK_W 0x8 + #define WRITEMASK_XW 0x9 + #define WRITEMASK_YW 0xa + #define WRITEMASK_XYW 0xb + #define WRITEMASK_ZW 0xc + #define WRITEMASK_XZW 0xd + #define WRITEMASK_YZW 0xe + #define WRITEMASK_XYZW 0xf + /*@}*/ + + + /** + * Condition codes + */ + /*@{*/ + #define COND_GT 1 /**< greater than zero */ + #define COND_EQ 2 /**< equal to zero */ + #define COND_LT 3 /**< less than zero */ + #define COND_UN 4 /**< unordered (NaN) */ + #define COND_GE 5 /**< greater than or equal to zero */ + #define COND_LE 6 /**< less than or equal to zero */ + #define COND_NE 7 /**< not equal to zero */ + #define COND_TR 8 /**< always true */ + #define COND_FL 9 /**< always false */ + /*@}*/ + + + /** + * Instruction precision for GL_NV_fragment_program + */ + /*@{*/ + #define FLOAT32 0x1 + #define FLOAT16 0x2 + #define FIXED12 0x4 + /*@}*/ + + + /** + * Saturation modes when storing values. + */ + /*@{*/ + #define SATURATE_OFF 0 + #define SATURATE_ZERO_ONE 1 + /*@}*/ + + + /** + * Per-component negation masks + */ + /*@{*/ + #define NEGATE_X 0x1 + #define NEGATE_Y 0x2 + #define NEGATE_Z 0x4 + #define NEGATE_W 0x8 + #define NEGATE_XYZ 0x7 + #define NEGATE_XYZW 0xf + #define NEGATE_NONE 0x0 + /*@}*/ + + + /** + * Program instruction opcodes for vertex, fragment and geometry programs. + */ + typedef enum prog_opcode { + /* ARB_vp ARB_fp NV_vp NV_fp GLSL */ + /*------------------------------------------*/ + OPCODE_NOP = 0, /* X */ + OPCODE_ABS, /* X X 1.1 X */ + OPCODE_ADD, /* X X X X X */ + OPCODE_AND, /* */ + OPCODE_ARA, /* 2 */ + OPCODE_ARL, /* X X */ + OPCODE_ARL_NV, /* 2 */ + OPCODE_ARR, /* 2 */ + OPCODE_BGNLOOP, /* opt */ + OPCODE_BGNSUB, /* opt */ + OPCODE_BRA, /* 2 X */ + OPCODE_BRK, /* 2 opt */ + OPCODE_CAL, /* 2 2 */ + OPCODE_CMP, /* X */ + OPCODE_CONT, /* opt */ + OPCODE_COS, /* X 2 X X */ + OPCODE_DDX, /* X X */ + OPCODE_DDY, /* X X */ + OPCODE_DP2, /* 2 */ + OPCODE_DP2A, /* 2 */ + OPCODE_DP3, /* X X X X X */ + OPCODE_DP4, /* X X X X X */ + OPCODE_DPH, /* X X 1.1 */ + OPCODE_DST, /* X X X X */ + OPCODE_ELSE, /* X */ + OPCODE_EMIT_VERTEX,/* X */ + OPCODE_END, /* X X X X opt */ + OPCODE_END_PRIMITIVE,/* X */ + OPCODE_ENDIF, /* opt */ + OPCODE_ENDLOOP, /* opt */ + OPCODE_ENDSUB, /* opt */ + OPCODE_EX2, /* X X 2 X X */ + OPCODE_EXP, /* X X X */ + OPCODE_FLR, /* X X 2 X X */ + OPCODE_FRC, /* X X 2 X X */ + OPCODE_IF, /* opt */ + OPCODE_KIL, /* X */ + OPCODE_KIL_NV, /* X X */ + OPCODE_LG2, /* X X 2 X X */ + OPCODE_LIT, /* X X X X */ + OPCODE_LOG, /* X X X */ + OPCODE_LRP, /* X X */ + OPCODE_MAD, /* X X X X X */ + OPCODE_MAX, /* X X X X X */ + OPCODE_MIN, /* X X X X X */ + OPCODE_MOV, /* X X X X X */ + OPCODE_MUL, /* X X X X X */ + OPCODE_NOISE1, /* X */ + OPCODE_NOISE2, /* X */ + OPCODE_NOISE3, /* X */ + OPCODE_NOISE4, /* X */ + OPCODE_NOT, /* */ + OPCODE_NRM3, /* */ + OPCODE_NRM4, /* */ + OPCODE_OR, /* */ + OPCODE_PK2H, /* X */ + OPCODE_PK2US, /* X */ + OPCODE_PK4B, /* X */ + OPCODE_PK4UB, /* X */ + OPCODE_POW, /* X X X X */ + OPCODE_POPA, /* 3 */ + OPCODE_PRINT, /* X X */ + OPCODE_PUSHA, /* 3 */ + OPCODE_RCC, /* 1.1 */ + OPCODE_RCP, /* X X X X X */ + OPCODE_RET, /* 2 2 */ + OPCODE_RFL, /* X X */ + OPCODE_RSQ, /* X X X X X */ + OPCODE_SCS, /* X */ + OPCODE_SEQ, /* 2 X X */ + OPCODE_SFL, /* 2 X */ + OPCODE_SGE, /* X X X X X */ + OPCODE_SGT, /* 2 X X */ + OPCODE_SIN, /* X 2 X X */ + OPCODE_SLE, /* 2 X X */ + OPCODE_SLT, /* X X X X X */ + OPCODE_SNE, /* 2 X X */ + OPCODE_SSG, /* 2 */ + OPCODE_STR, /* 2 X */ + OPCODE_SUB, /* X X 1.1 X X */ + OPCODE_SWZ, /* X X */ + OPCODE_TEX, /* X 3 X X */ + OPCODE_TXB, /* X 3 X */ + OPCODE_TXD, /* X X */ + OPCODE_TXL, /* 3 2 X */ + OPCODE_TXP, /* X X */ + OPCODE_TXP_NV, /* 3 X */ + OPCODE_TRUNC, /* X */ + OPCODE_UP2H, /* X */ + OPCODE_UP2US, /* X */ + OPCODE_UP4B, /* X */ + OPCODE_UP4UB, /* X */ + OPCODE_X2D, /* X */ + OPCODE_XOR, /* */ + OPCODE_XPD, /* X X X */ + MAX_OPCODE + } gl_inst_opcode; + + + /** + * Number of bits for the src/dst register Index field. + * This limits the size of temp/uniform register files. + */ + #define INST_INDEX_BITS 10 + + + /** + * Instruction source register. + */ + struct prog_src_register + { + GLuint File:4; /**< One of the PROGRAM_* register file values. */ + GLint Index:(INST_INDEX_BITS+1); /**< Extra bit here for sign bit. + * May be negative for relative addressing. + */ + GLuint Swizzle:12; + GLuint RelAddr:1; + + /** Take the component-wise absolute value */ + GLuint Abs:1; + + /** + * Post-Abs negation. + * This will either be NEGATE_NONE or NEGATE_XYZW, except for the SWZ + * instruction which allows per-component negation. + */ + GLuint Negate:4; + + /** + * Is the register two-dimensional. + * Two dimensional registers are of the + * REGISTER[index][index2] format. + * They are used by the geometry shaders where + * the first index is the index within an array + * and the second index is the semantic of the + * array, e.g. gl_PositionIn[index] would become + * INPUT[index][gl_PositionIn] + */ + GLuint HasIndex2:1; + GLuint RelAddr2:1; + GLint Index2:(INST_INDEX_BITS+1); /**< Extra bit here for sign bit. + * May be negative for relative + * addressing. */ + }; + + + /** + * Instruction destination register. + */ + struct prog_dst_register + { + GLuint File:4; /**< One of the PROGRAM_* register file values */ + GLuint Index:INST_INDEX_BITS; /**< Unsigned, never negative */ + GLuint WriteMask:4; + GLuint RelAddr:1; + + /** + * \name Conditional destination update control. + * + * \since + * NV_fragment_program, NV_fragment_program_option, NV_vertex_program2, + * NV_vertex_program2_option. + */ + /*@{*/ + /** + * Takes one of the 9 possible condition values (EQ, FL, GT, GE, LE, LT, + * NE, TR, or UN). Dest reg is only written to if the matching + * (swizzled) condition code value passes. When a conditional update mask + * is not specified, this will be \c COND_TR. + */ + GLuint CondMask:4; + + /** + * Condition code swizzle value. + */ + GLuint CondSwizzle:12; + + /** + * Selects the condition code register to use for conditional destination + * update masking. In NV_fragmnet_program or NV_vertex_program2 mode, only + * condition code register 0 is available. In NV_vertex_program3 mode, + * condition code registers 0 and 1 are available. + */ + GLuint CondSrc:1; + /*@}*/ + }; + + + /** + * Vertex/fragment program instruction. + */ + struct prog_instruction + { + gl_inst_opcode Opcode; + struct prog_src_register SrcReg[3]; + struct prog_dst_register DstReg; + + /** + * Indicates that the instruction should update the condition code + * register. + * + * \since + * NV_fragment_program, NV_fragment_program_option, NV_vertex_program2, + * NV_vertex_program2_option. + */ + GLuint CondUpdate:1; + + /** + * If prog_instruction::CondUpdate is \c GL_TRUE, this value selects the + * condition code register that is to be updated. + * + * In GL_NV_fragment_program or GL_NV_vertex_program2 mode, only condition + * code register 0 is available. In GL_NV_vertex_program3 mode, condition + * code registers 0 and 1 are available. + * + * \since + * NV_fragment_program, NV_fragment_program_option, NV_vertex_program2, + * NV_vertex_program2_option. + */ + GLuint CondDst:1; + + /** + * Saturate each value of the vectored result to the range [0,1] or the + * range [-1,1]. \c SSAT mode (i.e., saturation to the range [-1,1]) is + * only available in NV_fragment_program2 mode. + * Value is one of the SATURATE_* tokens. + * + * \since + * NV_fragment_program, NV_fragment_program_option, NV_vertex_program3. + */ + GLuint SaturateMode:2; + + /** + * Per-instruction selectable precision: FLOAT32, FLOAT16, FIXED12. + * + * \since + * NV_fragment_program, NV_fragment_program_option. + */ + GLuint Precision:3; + + /** + * \name Extra fields for TEX, TXB, TXD, TXL, TXP instructions. + */ + /*@{*/ + /** Source texture unit. */ + GLuint TexSrcUnit:5; + + /** Source texture target, one of TEXTURE_{1D,2D,3D,CUBE,RECT}_INDEX */ + GLuint TexSrcTarget:3; + + /** True if tex instruction should do shadow comparison */ + GLuint TexShadow:1; + /*@}*/ + + /** + * For BRA and CAL instructions, the location to jump to. + * For BGNLOOP, points to ENDLOOP (and vice-versa). - * For BRK, points to BGNLOOP (which points to ENDLOOP). ++ * For BRK, points to ENDLOOP + * For IF, points to ELSE or ENDIF. + * For ELSE, points to ENDIF. + */ + GLint BranchTarget; + + /** for debugging purposes */ + const char *Comment; + + /** Arbitrary data. Used for OPCODE_PRINT and some drivers */ + void *Data; + + /** for driver use (try to remove someday) */ + GLint Aux; + }; + + + extern void + _mesa_init_instructions(struct prog_instruction *inst, GLuint count); + + extern struct prog_instruction * + _mesa_alloc_instructions(GLuint numInst); + + extern struct prog_instruction * + _mesa_realloc_instructions(struct prog_instruction *oldInst, + GLuint numOldInst, GLuint numNewInst); + + extern struct prog_instruction * + _mesa_copy_instructions(struct prog_instruction *dest, + const struct prog_instruction *src, GLuint n); + + extern void + _mesa_free_instructions(struct prog_instruction *inst, GLuint count); + + extern GLuint + _mesa_num_inst_src_regs(gl_inst_opcode opcode); + + extern GLuint + _mesa_num_inst_dst_regs(gl_inst_opcode opcode); + + extern GLboolean + _mesa_is_tex_instruction(gl_inst_opcode opcode); + + extern GLboolean + _mesa_check_soa_dependencies(const struct prog_instruction *inst); + + extern const char * + _mesa_opcode_string(gl_inst_opcode opcode); + + + #endif /* PROG_INSTRUCTION_H */ diff --cc src/mesa/program/prog_optimize.c index 00000000000,2941a17da3f..bd120b8643c mode 000000,100644..100644 --- a/src/mesa/program/prog_optimize.c +++ b/src/mesa/program/prog_optimize.c @@@ -1,0 -1,1035 +1,1041 @@@ + /* + * Mesa 3-D graphics library + * Version: 7.5 + * + * Copyright (C) 2009 VMware, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * VMWARE BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + + + + #include "main/glheader.h" + #include "main/context.h" + #include "main/macros.h" + #include "program.h" + #include "prog_instruction.h" + #include "prog_optimize.h" + #include "prog_print.h" + + + #define MAX_LOOP_NESTING 50 + + + static GLboolean dbg = GL_FALSE; + + /* Returns the mask of channels read from the given srcreg in this instruction. + */ + static GLuint + get_src_arg_mask(const struct prog_instruction *inst, int arg) + { + int writemask = inst->DstReg.WriteMask; + + if (inst->CondUpdate) + writemask = WRITEMASK_XYZW; + + switch (inst->Opcode) { + case OPCODE_MOV: + case OPCODE_ABS: + case OPCODE_ADD: + case OPCODE_MUL: + case OPCODE_SUB: + return writemask; + case OPCODE_RCP: + case OPCODE_SIN: + case OPCODE_COS: + case OPCODE_RSQ: + case OPCODE_POW: + case OPCODE_EX2: + return WRITEMASK_X; + case OPCODE_DP2: + return WRITEMASK_XY; + case OPCODE_DP3: + case OPCODE_XPD: + return WRITEMASK_XYZ; + default: + return WRITEMASK_XYZW; + } + } + + /** + * In 'prog' remove instruction[i] if removeFlags[i] == TRUE. + * \return number of instructions removed + */ + static GLuint + remove_instructions(struct gl_program *prog, const GLboolean *removeFlags) + { + GLint i, removeEnd = 0, removeCount = 0; + GLuint totalRemoved = 0; + + /* go backward */ + for (i = prog->NumInstructions - 1; i >= 0; i--) { + if (removeFlags[i]) { + totalRemoved++; + if (removeCount == 0) { + /* begin a run of instructions to remove */ + removeEnd = i; + removeCount = 1; + } + else { + /* extend the run of instructions to remove */ + removeCount++; + } + } + else { + /* don't remove this instruction, but check if the preceeding + * instructions are to be removed. + */ + if (removeCount > 0) { + GLint removeStart = removeEnd - removeCount + 1; + _mesa_delete_instructions(prog, removeStart, removeCount); + removeStart = removeCount = 0; /* reset removal info */ + } + } + } + /* Finish removing if the first instruction was to be removed. */ + if (removeCount > 0) { + GLint removeStart = removeEnd - removeCount + 1; + _mesa_delete_instructions(prog, removeStart, removeCount); + } + return totalRemoved; + } + + + /** + * Remap register indexes according to map. + * \param prog the program to search/replace + * \param file the type of register file to search/replace + * \param map maps old register indexes to new indexes + */ + static void + replace_regs(struct gl_program *prog, gl_register_file file, const GLint map[]) + { + GLuint i; + + for (i = 0; i < prog->NumInstructions; i++) { + struct prog_instruction *inst = prog->Instructions + i; + const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode); + GLuint j; + for (j = 0; j < numSrc; j++) { + if (inst->SrcReg[j].File == file) { + GLuint index = inst->SrcReg[j].Index; + ASSERT(map[index] >= 0); + inst->SrcReg[j].Index = map[index]; + } + } + if (inst->DstReg.File == file) { + const GLuint index = inst->DstReg.Index; + ASSERT(map[index] >= 0); + inst->DstReg.Index = map[index]; + } + } + } + + + /** + * Consolidate temporary registers to use low numbers. For example, if the + * shader only uses temps 4, 5, 8, replace them with 0, 1, 2. + */ + static void + _mesa_consolidate_registers(struct gl_program *prog) + { + GLboolean tempUsed[MAX_PROGRAM_TEMPS]; + GLint tempMap[MAX_PROGRAM_TEMPS]; + GLuint tempMax = 0, i; + + if (dbg) { + printf("Optimize: Begin register consolidation\n"); + } + + memset(tempUsed, 0, sizeof(tempUsed)); + + for (i = 0; i < MAX_PROGRAM_TEMPS; i++) { + tempMap[i] = -1; + } + + /* set tempUsed[i] if temporary [i] is referenced */ + for (i = 0; i < prog->NumInstructions; i++) { + const struct prog_instruction *inst = prog->Instructions + i; + const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode); + GLuint j; + for (j = 0; j < numSrc; j++) { + if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) { + const GLuint index = inst->SrcReg[j].Index; + ASSERT(index < MAX_PROGRAM_TEMPS); + tempUsed[index] = GL_TRUE; + tempMax = MAX2(tempMax, index); + break; + } + } + if (inst->DstReg.File == PROGRAM_TEMPORARY) { + const GLuint index = inst->DstReg.Index; + ASSERT(index < MAX_PROGRAM_TEMPS); + tempUsed[index] = GL_TRUE; + tempMax = MAX2(tempMax, index); + } + } + + /* allocate a new index for each temp that's used */ + { + GLuint freeTemp = 0; + for (i = 0; i <= tempMax; i++) { + if (tempUsed[i]) { + tempMap[i] = freeTemp++; + /*printf("replace %u with %u\n", i, tempMap[i]);*/ + } + } + if (freeTemp == tempMax + 1) { + /* no consolidation possible */ + return; + } + if (dbg) { + printf("Replace regs 0..%u with 0..%u\n", tempMax, freeTemp-1); + } + } + + replace_regs(prog, PROGRAM_TEMPORARY, tempMap); + + if (dbg) { + printf("Optimize: End register consolidation\n"); + } + } + + + /** + * Remove dead instructions from the given program. + * This is very primitive for now. Basically look for temp registers + * that are written to but never read. Remove any instructions that + * write to such registers. Be careful with condition code setters. + */ + static void + _mesa_remove_dead_code(struct gl_program *prog) + { + GLboolean tempRead[MAX_PROGRAM_TEMPS][4]; + GLboolean *removeInst; /* per-instruction removal flag */ + GLuint i, rem = 0, comp; + + memset(tempRead, 0, sizeof(tempRead)); + + if (dbg) { + printf("Optimize: Begin dead code removal\n"); + /*_mesa_print_program(prog);*/ + } + + removeInst = (GLboolean *) + calloc(1, prog->NumInstructions * sizeof(GLboolean)); + + /* Determine which temps are read and written */ + for (i = 0; i < prog->NumInstructions; i++) { + const struct prog_instruction *inst = prog->Instructions + i; + const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode); + GLuint j; + + /* check src regs */ + for (j = 0; j < numSrc; j++) { + if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) { + const GLuint index = inst->SrcReg[j].Index; + GLuint read_mask; + ASSERT(index < MAX_PROGRAM_TEMPS); + read_mask = get_src_arg_mask(inst, j); + + if (inst->SrcReg[j].RelAddr) { + if (dbg) + printf("abort remove dead code (indirect temp)\n"); + goto done; + } + + for (comp = 0; comp < 4; comp++) { + GLuint swz = (inst->SrcReg[j].Swizzle >> (3 * comp)) & 0x7; + + if ((read_mask & (1 << comp)) == 0) + continue; + + switch (swz) { + case SWIZZLE_X: + tempRead[index][0] = GL_TRUE; + break; + case SWIZZLE_Y: + tempRead[index][1] = GL_TRUE; + break; + case SWIZZLE_Z: + tempRead[index][2] = GL_TRUE; + break; + case SWIZZLE_W: + tempRead[index][3] = GL_TRUE; + break; + } + } + } + } + + /* check dst reg */ + if (inst->DstReg.File == PROGRAM_TEMPORARY) { + const GLuint index = inst->DstReg.Index; + ASSERT(index < MAX_PROGRAM_TEMPS); + + if (inst->DstReg.RelAddr) { + if (dbg) + printf("abort remove dead code (indirect temp)\n"); + goto done; + } + + if (inst->CondUpdate) { + /* If we're writing to this register and setting condition + * codes we cannot remove the instruction. Prevent removal + * by setting the 'read' flag. + */ + tempRead[index][0] = GL_TRUE; + tempRead[index][1] = GL_TRUE; + tempRead[index][2] = GL_TRUE; + tempRead[index][3] = GL_TRUE; + } + } + } + + /* find instructions that write to dead registers, flag for removal */ + for (i = 0; i < prog->NumInstructions; i++) { + struct prog_instruction *inst = prog->Instructions + i; + const GLuint numDst = _mesa_num_inst_dst_regs(inst->Opcode); + + if (numDst != 0 && inst->DstReg.File == PROGRAM_TEMPORARY) { + GLint chan, index = inst->DstReg.Index; + + for (chan = 0; chan < 4; chan++) { + if (!tempRead[index][chan] && + inst->DstReg.WriteMask & (1 << chan)) { + if (dbg) { + printf("Remove writemask on %u.%c\n", i, + chan == 3 ? 'w' : 'x' + chan); + } + inst->DstReg.WriteMask &= ~(1 << chan); + rem++; + } + } + + if (inst->DstReg.WriteMask == 0) { + /* If we cleared all writes, the instruction can be removed. */ + if (dbg) + printf("Remove instruction %u: \n", i); + removeInst[i] = GL_TRUE; + } + } + } + + /* now remove the instructions which aren't needed */ + rem = remove_instructions(prog, removeInst); + + if (dbg) { + printf("Optimize: End dead code removal.\n"); + printf(" %u channel writes removed\n", rem); + printf(" %u instructions removed\n", rem); + /*_mesa_print_program(prog);*/ + } + + done: + free(removeInst); + } + + + enum temp_use + { + READ, + WRITE, + FLOW, + END + }; + + /** + * Scan forward in program from 'start' for the next occurance of TEMP[index]. + * Return READ, WRITE, FLOW or END to indicate the next usage or an indicator + * that we can't look further. + */ + static enum temp_use + find_next_temp_use(const struct gl_program *prog, GLuint start, GLuint index) + { + GLuint i; + + for (i = start; i < prog->NumInstructions; i++) { + const struct prog_instruction *inst = prog->Instructions + i; + switch (inst->Opcode) { + case OPCODE_BGNLOOP: + case OPCODE_ENDLOOP: + case OPCODE_BGNSUB: + case OPCODE_ENDSUB: + return FLOW; + default: + { + const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode); + GLuint j; + for (j = 0; j < numSrc; j++) { + if (inst->SrcReg[j].File == PROGRAM_TEMPORARY && + inst->SrcReg[j].Index == index) + return READ; + } + if (inst->DstReg.File == PROGRAM_TEMPORARY && + inst->DstReg.Index == index) + return WRITE; + } + } + } + + return END; + } + + static GLboolean _mesa_is_flow_control_opcode(enum prog_opcode opcode) + { + switch (opcode) { + case OPCODE_BGNLOOP: + case OPCODE_BGNSUB: + case OPCODE_BRA: + case OPCODE_CAL: + case OPCODE_CONT: + case OPCODE_IF: + case OPCODE_ELSE: + case OPCODE_END: + case OPCODE_ENDIF: + case OPCODE_ENDLOOP: + case OPCODE_ENDSUB: + case OPCODE_RET: + return GL_TRUE; + default: + return GL_FALSE; + } + } + + /** + * Try to remove use of extraneous MOV instructions, to free them up for dead + * code removal. + */ + static void + _mesa_remove_extra_move_use(struct gl_program *prog) + { + GLuint i, j; + + if (dbg) { + printf("Optimize: Begin remove extra move use\n"); + _mesa_print_program(prog); + } + + /* + * Look for sequences such as this: + * MOV tmpX, arg0; + * ... + * FOO tmpY, tmpX, arg1; + * and convert into: + * MOV tmpX, arg0; + * ... + * FOO tmpY, arg0, arg1; + */ + + for (i = 0; i + 1 < prog->NumInstructions; i++) { + const struct prog_instruction *mov = prog->Instructions + i; + + if (mov->Opcode != OPCODE_MOV || + mov->DstReg.File != PROGRAM_TEMPORARY || + mov->DstReg.RelAddr || + mov->DstReg.CondMask != COND_TR || + mov->SaturateMode != SATURATE_OFF || + mov->SrcReg[0].RelAddr) + continue; + + /* Walk through remaining instructions until the or src reg gets + * rewritten or we get into some flow-control, eliminating the use of + * this MOV. + */ + for (j = i + 1; j < prog->NumInstructions; j++) { + struct prog_instruction *inst2 = prog->Instructions + j; + GLuint arg; + + if (_mesa_is_flow_control_opcode(inst2->Opcode)) + break; + + /* First rewrite this instruction's args if appropriate. */ + for (arg = 0; arg < _mesa_num_inst_src_regs(inst2->Opcode); arg++) { + int comp; + int read_mask = get_src_arg_mask(inst2, arg); + + if (inst2->SrcReg[arg].File != mov->DstReg.File || + inst2->SrcReg[arg].Index != mov->DstReg.Index || + inst2->SrcReg[arg].RelAddr || + inst2->SrcReg[arg].Abs) + continue; + + /* Check that all the sources for this arg of inst2 come from inst1 + * or constants. + */ + for (comp = 0; comp < 4; comp++) { + int src_swz = GET_SWZ(inst2->SrcReg[arg].Swizzle, comp); + + /* If the MOV didn't write that channel, can't use it. */ + if ((read_mask & (1 << comp)) && + src_swz <= SWIZZLE_W && + (mov->DstReg.WriteMask & (1 << src_swz)) == 0) + break; + } + if (comp != 4) + continue; + + /* Adjust the swizzles of inst2 to point at MOV's source */ + for (comp = 0; comp < 4; comp++) { + int inst2_swz = GET_SWZ(inst2->SrcReg[arg].Swizzle, comp); + + if (inst2_swz <= SWIZZLE_W) { + GLuint s = GET_SWZ(mov->SrcReg[0].Swizzle, inst2_swz); + inst2->SrcReg[arg].Swizzle &= ~(7 << (3 * comp)); + inst2->SrcReg[arg].Swizzle |= s << (3 * comp); + inst2->SrcReg[arg].Negate ^= (((mov->SrcReg[0].Negate >> + inst2_swz) & 0x1) << comp); + } + } + inst2->SrcReg[arg].File = mov->SrcReg[0].File; + inst2->SrcReg[arg].Index = mov->SrcReg[0].Index; + } + + /* If this instruction overwrote part of the move, our time is up. */ + if ((inst2->DstReg.File == mov->DstReg.File && + (inst2->DstReg.RelAddr || + inst2->DstReg.Index == mov->DstReg.Index)) || + (inst2->DstReg.File == mov->SrcReg[0].File && + (inst2->DstReg.RelAddr || + inst2->DstReg.Index == mov->SrcReg[0].Index))) + break; + } + } + + if (dbg) { + printf("Optimize: End remove extra move use.\n"); + /*_mesa_print_program(prog);*/ + } + } + + /** + * Try to remove extraneous MOV instructions from the given program. + */ + static void + _mesa_remove_extra_moves(struct gl_program *prog) + { + GLboolean *removeInst; /* per-instruction removal flag */ + GLuint i, rem, loopNesting = 0, subroutineNesting = 0; + + if (dbg) { + printf("Optimize: Begin remove extra moves\n"); + _mesa_print_program(prog); + } + + removeInst = (GLboolean *) + calloc(1, prog->NumInstructions * sizeof(GLboolean)); + + /* + * Look for sequences such as this: + * FOO tmpX, arg0, arg1; + * MOV tmpY, tmpX; + * and convert into: + * FOO tmpY, arg0, arg1; + */ + + for (i = 0; i < prog->NumInstructions; i++) { + const struct prog_instruction *inst = prog->Instructions + i; + + switch (inst->Opcode) { + case OPCODE_BGNLOOP: + loopNesting++; + break; + case OPCODE_ENDLOOP: + loopNesting--; + break; + case OPCODE_BGNSUB: + subroutineNesting++; + break; + case OPCODE_ENDSUB: + subroutineNesting--; + break; + case OPCODE_MOV: + if (i > 0 && + loopNesting == 0 && + subroutineNesting == 0 && + inst->SrcReg[0].File == PROGRAM_TEMPORARY && + inst->SrcReg[0].Swizzle == SWIZZLE_XYZW) { + /* see if this MOV can be removed */ + const GLuint tempIndex = inst->SrcReg[0].Index; + struct prog_instruction *prevInst; + GLuint prevI; + + /* get pointer to previous instruction */ + prevI = i - 1; + while (prevI > 0 && removeInst[prevI]) + prevI--; + prevInst = prog->Instructions + prevI; + + if (prevInst->DstReg.File == PROGRAM_TEMPORARY && + prevInst->DstReg.Index == tempIndex && + prevInst->DstReg.WriteMask == WRITEMASK_XYZW) { + + enum temp_use next_use = + find_next_temp_use(prog, i + 1, tempIndex); + + if (next_use == WRITE || next_use == END) { + /* OK, we can safely remove this MOV instruction. + * Transform: + * prevI: FOO tempIndex, x, y; + * i: MOV z, tempIndex; + * Into: + * prevI: FOO z, x, y; + */ + + /* patch up prev inst */ + prevInst->DstReg.File = inst->DstReg.File; + prevInst->DstReg.Index = inst->DstReg.Index; + + /* flag this instruction for removal */ + removeInst[i] = GL_TRUE; + + if (dbg) { + printf("Remove MOV at %u\n", i); + printf("new prev inst %u: ", prevI); + _mesa_print_instruction(prevInst); + } + } + } + } + break; + default: + ; /* nothing */ + } + } + + /* now remove the instructions which aren't needed */ + rem = remove_instructions(prog, removeInst); + + free(removeInst); + + if (dbg) { + printf("Optimize: End remove extra moves. %u instructions removed\n", rem); + /*_mesa_print_program(prog);*/ + } + } + + + /** A live register interval */ + struct interval + { + GLuint Reg; /** The temporary register index */ + GLuint Start, End; /** Start/end instruction numbers */ + }; + + + /** A list of register intervals */ + struct interval_list + { + GLuint Num; + struct interval Intervals[MAX_PROGRAM_TEMPS]; + }; + + + static void + append_interval(struct interval_list *list, const struct interval *inv) + { + list->Intervals[list->Num++] = *inv; + } + + + /** Insert interval inv into list, sorted by interval end */ + static void + insert_interval_by_end(struct interval_list *list, const struct interval *inv) + { + /* XXX we could do a binary search insertion here since list is sorted */ + GLint i = list->Num - 1; + while (i >= 0 && list->Intervals[i].End > inv->End) { + list->Intervals[i + 1] = list->Intervals[i]; + i--; + } + list->Intervals[i + 1] = *inv; + list->Num++; + + #ifdef DEBUG + { + GLuint i; + for (i = 0; i + 1 < list->Num; i++) { + ASSERT(list->Intervals[i].End <= list->Intervals[i + 1].End); + } + } + #endif + } + + + /** Remove the given interval from the interval list */ + static void + remove_interval(struct interval_list *list, const struct interval *inv) + { + /* XXX we could binary search since list is sorted */ + GLuint k; + for (k = 0; k < list->Num; k++) { + if (list->Intervals[k].Reg == inv->Reg) { + /* found, remove it */ + ASSERT(list->Intervals[k].Start == inv->Start); + ASSERT(list->Intervals[k].End == inv->End); + while (k < list->Num - 1) { + list->Intervals[k] = list->Intervals[k + 1]; + k++; + } + list->Num--; + return; + } + } + } + + + /** called by qsort() */ + static int + compare_start(const void *a, const void *b) + { + const struct interval *ia = (const struct interval *) a; + const struct interval *ib = (const struct interval *) b; + if (ia->Start < ib->Start) + return -1; + else if (ia->Start > ib->Start) + return +1; + else + return 0; + } + + /** sort the interval list according to interval starts */ + static void + sort_interval_list_by_start(struct interval_list *list) + { + qsort(list->Intervals, list->Num, sizeof(struct interval), compare_start); + #ifdef DEBUG + { + GLuint i; + for (i = 0; i + 1 < list->Num; i++) { + ASSERT(list->Intervals[i].Start <= list->Intervals[i + 1].Start); + } + } + #endif + } + ++struct loop_info ++{ ++ GLuint Start, End; /**< Start, end instructions of loop */ ++}; + + /** + * Update the intermediate interval info for register 'index' and + * instruction 'ic'. + */ + static void -update_interval(GLint intBegin[], GLint intEnd[], GLuint index, GLuint ic) ++update_interval(GLint intBegin[], GLint intEnd[], ++ struct loop_info *loopStack, GLuint loopStackDepth, ++ GLuint index, GLuint ic) + { ++ int i; ++ ++ /* If the register is used in a loop, extend its lifetime through the end ++ * of the outermost loop that doesn't contain its definition. ++ */ ++ for (i = 0; i < loopStackDepth; i++) { ++ if (intBegin[index] < loopStack[i].Start) { ++ ic = loopStack[i].End; ++ break; ++ } ++ } ++ + ASSERT(index < MAX_PROGRAM_TEMPS); + if (intBegin[index] == -1) { + ASSERT(intEnd[index] == -1); + intBegin[index] = intEnd[index] = ic; + } + else { + intEnd[index] = ic; + } + } + + + /** + * Find first/last instruction that references each temporary register. + */ + GLboolean + _mesa_find_temp_intervals(const struct prog_instruction *instructions, + GLuint numInstructions, + GLint intBegin[MAX_PROGRAM_TEMPS], + GLint intEnd[MAX_PROGRAM_TEMPS]) + { - struct loop_info - { - GLuint Start, End; /**< Start, end instructions of loop */ - }; + struct loop_info loopStack[MAX_LOOP_NESTING]; + GLuint loopStackDepth = 0; + GLuint i; + + for (i = 0; i < MAX_PROGRAM_TEMPS; i++){ + intBegin[i] = intEnd[i] = -1; + } + + /* Scan instructions looking for temporary registers */ + for (i = 0; i < numInstructions; i++) { + const struct prog_instruction *inst = instructions + i; + if (inst->Opcode == OPCODE_BGNLOOP) { + loopStack[loopStackDepth].Start = i; + loopStack[loopStackDepth].End = inst->BranchTarget; + loopStackDepth++; + } + else if (inst->Opcode == OPCODE_ENDLOOP) { + loopStackDepth--; + } + else if (inst->Opcode == OPCODE_CAL) { + return GL_FALSE; + } + else { + const GLuint numSrc = 3;/*_mesa_num_inst_src_regs(inst->Opcode);*/ + GLuint j; + for (j = 0; j < numSrc; j++) { + if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) { + const GLuint index = inst->SrcReg[j].Index; + if (inst->SrcReg[j].RelAddr) + return GL_FALSE; - update_interval(intBegin, intEnd, index, i); - if (loopStackDepth > 0) { - /* extend temp register's interval to end of loop */ - GLuint loopEnd = loopStack[loopStackDepth - 1].End; - update_interval(intBegin, intEnd, index, loopEnd); - } ++ update_interval(intBegin, intEnd, loopStack, loopStackDepth, ++ index, i); + } + } + if (inst->DstReg.File == PROGRAM_TEMPORARY) { + const GLuint index = inst->DstReg.Index; + if (inst->DstReg.RelAddr) + return GL_FALSE; - update_interval(intBegin, intEnd, index, i); - if (loopStackDepth > 0) { - /* extend temp register's interval to end of loop */ - GLuint loopEnd = loopStack[loopStackDepth - 1].End; - update_interval(intBegin, intEnd, index, loopEnd); - } ++ update_interval(intBegin, intEnd, loopStack, loopStackDepth, ++ index, i); + } + } + } + + return GL_TRUE; + } + + + /** + * Find the live intervals for each temporary register in the program. + * For register R, the interval [A,B] indicates that R is referenced + * from instruction A through instruction B. + * Special consideration is needed for loops and subroutines. + * \return GL_TRUE if success, GL_FALSE if we cannot proceed for some reason + */ + static GLboolean + find_live_intervals(struct gl_program *prog, + struct interval_list *liveIntervals) + { + GLint intBegin[MAX_PROGRAM_TEMPS], intEnd[MAX_PROGRAM_TEMPS]; + GLuint i; + + /* + * Note: we'll return GL_FALSE below if we find relative indexing + * into the TEMP register file. We can't handle that yet. + * We also give up on subroutines for now. + */ + + if (dbg) { + printf("Optimize: Begin find intervals\n"); + } + + /* build intermediate arrays */ + if (!_mesa_find_temp_intervals(prog->Instructions, prog->NumInstructions, + intBegin, intEnd)) + return GL_FALSE; + + /* Build live intervals list from intermediate arrays */ + liveIntervals->Num = 0; + for (i = 0; i < MAX_PROGRAM_TEMPS; i++) { + if (intBegin[i] >= 0) { + struct interval inv; + inv.Reg = i; + inv.Start = intBegin[i]; + inv.End = intEnd[i]; + append_interval(liveIntervals, &inv); + } + } + + /* Sort the list according to interval starts */ + sort_interval_list_by_start(liveIntervals); + + if (dbg) { + /* print interval info */ + for (i = 0; i < liveIntervals->Num; i++) { + const struct interval *inv = liveIntervals->Intervals + i; + printf("Reg[%d] live [%d, %d]:", + inv->Reg, inv->Start, inv->End); + if (1) { + GLuint j; + for (j = 0; j < inv->Start; j++) + printf(" "); + for (j = inv->Start; j <= inv->End; j++) + printf("x"); + } + printf("\n"); + } + } + + return GL_TRUE; + } + + + /** Scan the array of used register flags to find free entry */ + static GLint + alloc_register(GLboolean usedRegs[MAX_PROGRAM_TEMPS]) + { + GLuint k; + for (k = 0; k < MAX_PROGRAM_TEMPS; k++) { + if (!usedRegs[k]) { + usedRegs[k] = GL_TRUE; + return k; + } + } + return -1; + } + + + /** + * This function implements "Linear Scan Register Allocation" to reduce + * the number of temporary registers used by the program. + * + * We compute the "live interval" for all temporary registers then + * examine the overlap of the intervals to allocate new registers. + * Basically, if two intervals do not overlap, they can use the same register. + */ + static void + _mesa_reallocate_registers(struct gl_program *prog) + { + struct interval_list liveIntervals; + GLint registerMap[MAX_PROGRAM_TEMPS]; + GLboolean usedRegs[MAX_PROGRAM_TEMPS]; + GLuint i; + GLint maxTemp = -1; + + if (dbg) { + printf("Optimize: Begin live-interval register reallocation\n"); + _mesa_print_program(prog); + } + + for (i = 0; i < MAX_PROGRAM_TEMPS; i++){ + registerMap[i] = -1; + usedRegs[i] = GL_FALSE; + } + + if (!find_live_intervals(prog, &liveIntervals)) { + if (dbg) + printf("Aborting register reallocation\n"); + return; + } + + { + struct interval_list activeIntervals; + activeIntervals.Num = 0; + + /* loop over live intervals, allocating a new register for each */ + for (i = 0; i < liveIntervals.Num; i++) { + const struct interval *live = liveIntervals.Intervals + i; + + if (dbg) + printf("Consider register %u\n", live->Reg); + + /* Expire old intervals. Intervals which have ended with respect + * to the live interval can have their remapped registers freed. + */ + { + GLint j; + for (j = 0; j < (GLint) activeIntervals.Num; j++) { + const struct interval *inv = activeIntervals.Intervals + j; + if (inv->End >= live->Start) { + /* Stop now. Since the activeInterval list is sorted + * we know we don't have to go further. + */ + break; + } + else { + /* Interval 'inv' has expired */ + const GLint regNew = registerMap[inv->Reg]; + ASSERT(regNew >= 0); + + if (dbg) + printf(" expire interval for reg %u\n", inv->Reg); + + /* remove interval j from active list */ + remove_interval(&activeIntervals, inv); + j--; /* counter-act j++ in for-loop above */ + + /* return register regNew to the free pool */ + if (dbg) + printf(" free reg %d\n", regNew); + ASSERT(usedRegs[regNew] == GL_TRUE); + usedRegs[regNew] = GL_FALSE; + } + } + } + + /* find a free register for this live interval */ + { + const GLint k = alloc_register(usedRegs); + if (k < 0) { + /* out of registers, give up */ + return; + } + registerMap[live->Reg] = k; + maxTemp = MAX2(maxTemp, k); + if (dbg) + printf(" remap register %u -> %d\n", live->Reg, k); + } + + /* Insert this live interval into the active list which is sorted + * by increasing end points. + */ + insert_interval_by_end(&activeIntervals, live); + } + } + + if (maxTemp + 1 < (GLint) liveIntervals.Num) { + /* OK, we've reduced the number of registers needed. + * Scan the program and replace all the old temporary register + * indexes with the new indexes. + */ + replace_regs(prog, PROGRAM_TEMPORARY, registerMap); + + prog->NumTemporaries = maxTemp + 1; + } + + if (dbg) { + printf("Optimize: End live-interval register reallocation\n"); + printf("Num temp regs before: %u after: %u\n", + liveIntervals.Num, maxTemp + 1); + _mesa_print_program(prog); + } + } + + + /** + * Apply optimizations to the given program to eliminate unnecessary + * instructions, temp regs, etc. + */ + void + _mesa_optimize_program(GLcontext *ctx, struct gl_program *program) + { + _mesa_remove_extra_move_use(program); + + if (1) + _mesa_remove_dead_code(program); + + if (0) /* not tested much yet */ + _mesa_remove_extra_moves(program); + + if (0) + _mesa_consolidate_registers(program); + else + _mesa_reallocate_registers(program); + } diff --cc src/mesa/program/prog_parameter.c index 00000000000,aac488c79ab..ddbfe95c152 mode 000000,100644..100644 --- a/src/mesa/program/prog_parameter.c +++ b/src/mesa/program/prog_parameter.c @@@ -1,0 -1,751 +1,751 @@@ + /* + * Mesa 3-D graphics library + * Version: 7.3 + * + * Copyright (C) 1999-2008 Brian Paul All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + + /** + * \file prog_parameter.c + * Program parameter lists and functions. + * \author Brian Paul + */ + + + #include "main/glheader.h" + #include "main/imports.h" + #include "main/macros.h" + #include "prog_instruction.h" + #include "prog_parameter.h" + #include "prog_statevars.h" + + + struct gl_program_parameter_list * + _mesa_new_parameter_list(void) + { + return CALLOC_STRUCT(gl_program_parameter_list); + } + + + struct gl_program_parameter_list * + _mesa_new_parameter_list_sized(unsigned size) + { + struct gl_program_parameter_list *p = _mesa_new_parameter_list(); + + if ((p != NULL) && (size != 0)) { + p->Size = size; + + /* alloc arrays */ + p->Parameters = (struct gl_program_parameter *) + calloc(1, size * sizeof(struct gl_program_parameter)); + + p->ParameterValues = (GLfloat (*)[4]) + _mesa_align_malloc(size * 4 *sizeof(GLfloat), 16); + + + if ((p->Parameters == NULL) || (p->ParameterValues == NULL)) { + free(p->Parameters); + _mesa_align_free(p->ParameterValues); + free(p); + p = NULL; + } + } + + return p; + } + + + /** + * Free a parameter list and all its parameters + */ + void + _mesa_free_parameter_list(struct gl_program_parameter_list *paramList) + { + GLuint i; + for (i = 0; i < paramList->NumParameters; i++) { + if (paramList->Parameters[i].Name) + free((void *) paramList->Parameters[i].Name); + } + free(paramList->Parameters); + if (paramList->ParameterValues) + _mesa_align_free(paramList->ParameterValues); + free(paramList); + } + + + /** + * Add a new parameter to a parameter list. + * Note that parameter values are usually 4-element GLfloat vectors. + * When size > 4 we'll allocate a sequential block of parameters to + * store all the values (in blocks of 4). + * + * \param paramList the list to add the parameter to + * \param type type of parameter, such as + * \param name the parameter name, will be duplicated/copied! + * \param size number of elements in 'values' vector (1..4, or more) + * \param datatype GL_FLOAT, GL_FLOAT_VECx, GL_INT, GL_INT_VECx or GL_NONE. + * \param values initial parameter value, up to 4 GLfloats, or NULL + * \param state state indexes, or NULL + * \return index of new parameter in the list, or -1 if error (out of mem) + */ + GLint + _mesa_add_parameter(struct gl_program_parameter_list *paramList, + gl_register_file type, const char *name, + GLuint size, GLenum datatype, const GLfloat *values, + const gl_state_index state[STATE_LENGTH], + GLbitfield flags) + { + const GLuint oldNum = paramList->NumParameters; + const GLuint sz4 = (size + 3) / 4; /* no. of new param slots needed */ + + assert(size > 0); + + if (oldNum + sz4 > paramList->Size) { + /* Need to grow the parameter list array (alloc some extra) */ + paramList->Size = paramList->Size + 4 * sz4; + + /* realloc arrays */ + paramList->Parameters = (struct gl_program_parameter *) + _mesa_realloc(paramList->Parameters, + oldNum * sizeof(struct gl_program_parameter), + paramList->Size * sizeof(struct gl_program_parameter)); + + paramList->ParameterValues = (GLfloat (*)[4]) + _mesa_align_realloc(paramList->ParameterValues, /* old buf */ + oldNum * 4 * sizeof(GLfloat), /* old size */ + paramList->Size * 4 *sizeof(GLfloat), /* new sz */ + 16); + } + + if (!paramList->Parameters || + !paramList->ParameterValues) { + /* out of memory */ + paramList->NumParameters = 0; + paramList->Size = 0; + return -1; + } + else { + GLuint i; + + paramList->NumParameters = oldNum + sz4; + + memset(¶mList->Parameters[oldNum], 0, + sz4 * sizeof(struct gl_program_parameter)); + + for (i = 0; i < sz4; i++) { + struct gl_program_parameter *p = paramList->Parameters + oldNum + i; + p->Name = name ? _mesa_strdup(name) : NULL; + p->Type = type; + p->Size = size; + p->DataType = datatype; + p->Flags = flags; + if (values) { + COPY_4V(paramList->ParameterValues[oldNum + i], values); + values += 4; + p->Initialized = GL_TRUE; + } + else { + /* silence valgrind */ + ASSIGN_4V(paramList->ParameterValues[oldNum + i], 0, 0, 0, 0); + } + size -= 4; + } + + if (state) { + for (i = 0; i < STATE_LENGTH; i++) + paramList->Parameters[oldNum].StateIndexes[i] = state[i]; + } + + return (GLint) oldNum; + } + } + + + /** + * Add a new named program parameter (Ex: NV_fragment_program DEFINE statement) + * \return index of the new entry in the parameter list + */ + GLint + _mesa_add_named_parameter(struct gl_program_parameter_list *paramList, + const char *name, const GLfloat values[4]) + { + return _mesa_add_parameter(paramList, PROGRAM_NAMED_PARAM, name, + 4, GL_NONE, values, NULL, 0x0); + + } + + + /** + * Add a new named constant to the parameter list. + * This will be used when the program contains something like this: + * PARAM myVals = { 0, 1, 2, 3 }; + * + * \param paramList the parameter list + * \param name the name for the constant + * \param values four float values + * \return index/position of the new parameter in the parameter list + */ + GLint + _mesa_add_named_constant(struct gl_program_parameter_list *paramList, + const char *name, const GLfloat values[4], + GLuint size) + { + /* first check if this is a duplicate constant */ + GLint pos; + for (pos = 0; pos < (GLint)paramList->NumParameters; pos++) { + const GLfloat *pvals = paramList->ParameterValues[pos]; + if (pvals[0] == values[0] && + pvals[1] == values[1] && + pvals[2] == values[2] && + pvals[3] == values[3] && + strcmp(paramList->Parameters[pos].Name, name) == 0) { + /* Same name and value is already in the param list - reuse it */ + return pos; + } + } + /* not found, add new parameter */ + return _mesa_add_parameter(paramList, PROGRAM_CONSTANT, name, + size, GL_NONE, values, NULL, 0x0); + } + + + /** + * Add a new unnamed constant to the parameter list. This will be used + * when a fragment/vertex program contains something like this: + * MOV r, { 0, 1, 2, 3 }; + * If swizzleOut is non-null we'll search the parameter list for an + * existing instance of the constant which matches with a swizzle. + * + * \param paramList the parameter list + * \param values four float values + * \param swizzleOut returns swizzle mask for accessing the constant + * \return index/position of the new parameter in the parameter list. + */ + GLint + _mesa_add_unnamed_constant(struct gl_program_parameter_list *paramList, + const GLfloat values[4], GLuint size, + GLuint *swizzleOut) + { + GLint pos; + ASSERT(size >= 1); + ASSERT(size <= 4); + + if (swizzleOut && + _mesa_lookup_parameter_constant(paramList, values, + size, &pos, swizzleOut)) { + return pos; + } + + /* Look for empty space in an already unnamed constant parameter + * to add this constant. This will only work for single-element + * constants because we rely on smearing (i.e. .yyyy or .zzzz). + */ + if (size == 1 && swizzleOut) { + for (pos = 0; pos < (GLint) paramList->NumParameters; pos++) { + struct gl_program_parameter *p = paramList->Parameters + pos; + if (p->Type == PROGRAM_CONSTANT && p->Size + size <= 4) { + /* ok, found room */ + GLfloat *pVal = paramList->ParameterValues[pos]; + GLuint swz = p->Size; /* 1, 2 or 3 for Y, Z, W */ + pVal[p->Size] = values[0]; + p->Size++; + *swizzleOut = MAKE_SWIZZLE4(swz, swz, swz, swz); + return pos; + } + } + } + + /* add a new parameter to store this constant */ + pos = _mesa_add_parameter(paramList, PROGRAM_CONSTANT, NULL, + size, GL_NONE, values, NULL, 0x0); + if (pos >= 0 && swizzleOut) { + if (size == 1) + *swizzleOut = SWIZZLE_XXXX; + else + *swizzleOut = SWIZZLE_NOOP; + } + return pos; + } + + + /** + * Add a uniform to the parameter list. + * Note that if the uniform is an array, size may be greater than + * what's implied by the datatype. + * \param name uniform's name + * \param size number of floats to allocate + * \param datatype GL_FLOAT_VEC3, GL_FLOAT_MAT4, etc. + */ + GLint + _mesa_add_uniform(struct gl_program_parameter_list *paramList, + const char *name, GLuint size, GLenum datatype, + const GLfloat *values) + { + GLint i = _mesa_lookup_parameter_index(paramList, -1, name); + ASSERT(datatype != GL_NONE); + if (i >= 0 && paramList->Parameters[i].Type == PROGRAM_UNIFORM) { + ASSERT(paramList->Parameters[i].Size == size); + ASSERT(paramList->Parameters[i].DataType == datatype); + /* already in list */ + return i; + } + else { + i = _mesa_add_parameter(paramList, PROGRAM_UNIFORM, name, + size, datatype, values, NULL, 0x0); + return i; + } + } + + + /** + * Mark the named uniform as 'used'. + */ + void + _mesa_use_uniform(struct gl_program_parameter_list *paramList, + const char *name) + { + GLuint i; + for (i = 0; i < paramList->NumParameters; i++) { + struct gl_program_parameter *p = paramList->Parameters + i; + if ((p->Type == PROGRAM_UNIFORM || p->Type == PROGRAM_SAMPLER) && + strcmp(p->Name, name) == 0) { + p->Used = GL_TRUE; + /* Note that large uniforms may occupy several slots so we're + * not done searching yet. + */ + } + } + } + + + /** + * Add a sampler to the parameter list. + * \param name uniform's name + * \param datatype GL_SAMPLER_2D, GL_SAMPLER_2D_RECT_ARB, etc. + * \param index the sampler number (as seen in TEX instructions) + * \return sampler index (starting at zero) or -1 if error + */ + GLint + _mesa_add_sampler(struct gl_program_parameter_list *paramList, + const char *name, GLenum datatype) + { + GLint i = _mesa_lookup_parameter_index(paramList, -1, name); + if (i >= 0 && paramList->Parameters[i].Type == PROGRAM_SAMPLER) { + ASSERT(paramList->Parameters[i].Size == 1); + ASSERT(paramList->Parameters[i].DataType == datatype); + /* already in list */ + return (GLint) paramList->ParameterValues[i][0]; + } + else { + GLuint i; + const GLint size = 1; /* a sampler is basically a texture unit number */ + GLfloat value[4]; + GLint numSamplers = 0; + for (i = 0; i < paramList->NumParameters; i++) { + if (paramList->Parameters[i].Type == PROGRAM_SAMPLER) + numSamplers++; + } + value[0] = (GLfloat) numSamplers; + value[1] = value[2] = value[3] = 0.0F; + (void) _mesa_add_parameter(paramList, PROGRAM_SAMPLER, name, + size, datatype, value, NULL, 0x0); + return numSamplers; + } + } + + + /** + * Add parameter representing a varying variable. + */ + GLint + _mesa_add_varying(struct gl_program_parameter_list *paramList, + const char *name, GLuint size, GLenum datatype, + GLbitfield flags) + { + GLint i = _mesa_lookup_parameter_index(paramList, -1, name); + if (i >= 0 && paramList->Parameters[i].Type == PROGRAM_VARYING) { + /* already in list */ + return i; + } + else { + /*assert(size == 4);*/ + i = _mesa_add_parameter(paramList, PROGRAM_VARYING, name, + size, datatype, NULL, NULL, flags); + return i; + } + } + + + /** + * Add parameter representing a vertex program attribute. + * \param size size of attribute (in floats), may be -1 if unknown + * \param attrib the attribute index, or -1 if unknown + */ + GLint + _mesa_add_attribute(struct gl_program_parameter_list *paramList, + const char *name, GLint size, GLenum datatype, GLint attrib) + { + GLint i = _mesa_lookup_parameter_index(paramList, -1, name); + if (i >= 0) { + /* replace */ + if (attrib < 0) + attrib = i; + paramList->Parameters[i].StateIndexes[0] = attrib; + } + else { + /* add */ + gl_state_index state[STATE_LENGTH]; + state[0] = (gl_state_index) attrib; + if (size < 0) + size = 4; + i = _mesa_add_parameter(paramList, PROGRAM_INPUT, name, + size, datatype, NULL, state, 0x0); + } + return i; + } + + + + #if 0 /* not used yet */ + /** + * Returns the number of 4-component registers needed to store a piece + * of GL state. For matrices this may be as many as 4 registers, + * everything else needs + * just 1 register. + */ + static GLuint + sizeof_state_reference(const GLint *stateTokens) + { + if (stateTokens[0] == STATE_MATRIX) { + GLuint rows = stateTokens[4] - stateTokens[3] + 1; + assert(rows >= 1); + assert(rows <= 4); + return rows; + } + else { + return 1; + } + } + #endif + + + /** + * Add a new state reference to the parameter list. + * This will be used when the program contains something like this: + * PARAM ambient = state.material.front.ambient; + * + * \param paramList the parameter list + * \param stateTokens an array of 5 (STATE_LENGTH) state tokens + * \return index of the new parameter. + */ + GLint + _mesa_add_state_reference(struct gl_program_parameter_list *paramList, + const gl_state_index stateTokens[STATE_LENGTH]) + { + const GLuint size = 4; /* XXX fix */ + char *name; + GLint index; + + /* Check if the state reference is already in the list */ + for (index = 0; index < (GLint) paramList->NumParameters; index++) { + GLuint i, match = 0; + for (i = 0; i < STATE_LENGTH; i++) { + if (paramList->Parameters[index].StateIndexes[i] == stateTokens[i]) { + match++; + } + else { + break; + } + } + if (match == STATE_LENGTH) { + /* this state reference is already in the parameter list */ + return index; + } + } + + name = _mesa_program_state_string(stateTokens); + index = _mesa_add_parameter(paramList, PROGRAM_STATE_VAR, name, + size, GL_NONE, + NULL, (gl_state_index *) stateTokens, 0x0); + paramList->StateFlags |= _mesa_program_state_flags(stateTokens); + + /* free name string here since we duplicated it in add_parameter() */ + free(name); + + return index; + } + + + /** + * Lookup a parameter value by name in the given parameter list. + * \return pointer to the float[4] values. + */ + GLfloat * + _mesa_lookup_parameter_value(const struct gl_program_parameter_list *paramList, + GLsizei nameLen, const char *name) + { + GLint i = _mesa_lookup_parameter_index(paramList, nameLen, name); + if (i < 0) + return NULL; + else + return paramList->ParameterValues[i]; + } + + + /** + * Given a program parameter name, find its position in the list of parameters. + * \param paramList the parameter list to search + * \param nameLen length of name (in chars). + * If length is negative, assume that name is null-terminated. + * \param name the name to search for + * \return index of parameter in the list. + */ + GLint + _mesa_lookup_parameter_index(const struct gl_program_parameter_list *paramList, + GLsizei nameLen, const char *name) + { + GLint i; + + if (!paramList) + return -1; + + if (nameLen == -1) { + /* name is null-terminated */ + for (i = 0; i < (GLint) paramList->NumParameters; i++) { + if (paramList->Parameters[i].Name && + strcmp(paramList->Parameters[i].Name, name) == 0) + return i; + } + } + else { + /* name is not null-terminated, use nameLen */ + for (i = 0; i < (GLint) paramList->NumParameters; i++) { + if (paramList->Parameters[i].Name && + strncmp(paramList->Parameters[i].Name, name, nameLen) == 0 + && strlen(paramList->Parameters[i].Name) == (size_t)nameLen) + return i; + } + } + return -1; + } + + + /** + * Look for a float vector in the given parameter list. The float vector + * may be of length 1, 2, 3 or 4. If swizzleOut is non-null, we'll try + * swizzling to find a match. + * \param list the parameter list to search + * \param v the float vector to search for + * \param vSize number of element in v + * \param posOut returns the position of the constant, if found + * \param swizzleOut returns a swizzle mask describing location of the + * vector elements if found. + * \return GL_TRUE if found, GL_FALSE if not found + */ + GLboolean + _mesa_lookup_parameter_constant(const struct gl_program_parameter_list *list, + const GLfloat v[], GLuint vSize, + GLint *posOut, GLuint *swizzleOut) + { + GLuint i; + + assert(vSize >= 1); + assert(vSize <= 4); + + if (!list) + return -1; + + for (i = 0; i < list->NumParameters; i++) { + if (list->Parameters[i].Type == PROGRAM_CONSTANT) { + if (!swizzleOut) { + /* swizzle not allowed */ + GLuint j, match = 0; + for (j = 0; j < vSize; j++) { + if (v[j] == list->ParameterValues[i][j]) + match++; + } + if (match == vSize) { + *posOut = i; + return GL_TRUE; + } + } + else { + /* try matching w/ swizzle */ + if (vSize == 1) { + /* look for v[0] anywhere within float[4] value */ + GLuint j; - for (j = 0; j < 4; j++) { ++ for (j = 0; j < list->Parameters[i].Size; j++) { + if (list->ParameterValues[i][j] == v[0]) { + /* found it */ + *posOut = i; + *swizzleOut = MAKE_SWIZZLE4(j, j, j, j); + return GL_TRUE; + } + } + } + else if (vSize <= list->Parameters[i].Size) { + /* see if we can match this constant (with a swizzle) */ + GLuint swz[4]; + GLuint match = 0, j, k; + for (j = 0; j < vSize; j++) { + if (v[j] == list->ParameterValues[i][j]) { + swz[j] = j; + match++; + } + else { + for (k = 0; k < list->Parameters[i].Size; k++) { + if (v[j] == list->ParameterValues[i][k]) { + swz[j] = k; + match++; + break; + } + } + } + } + /* smear last value to remaining positions */ + for (; j < 4; j++) + swz[j] = swz[j-1]; + + if (match == vSize) { + *posOut = i; + *swizzleOut = MAKE_SWIZZLE4(swz[0], swz[1], swz[2], swz[3]); + return GL_TRUE; + } + } + } + } + } + + *posOut = -1; + return GL_FALSE; + } + + + struct gl_program_parameter_list * + _mesa_clone_parameter_list(const struct gl_program_parameter_list *list) + { + struct gl_program_parameter_list *clone; + GLuint i; + + clone = _mesa_new_parameter_list(); + if (!clone) + return NULL; + + /** Not too efficient, but correct */ + for (i = 0; i < list->NumParameters; i++) { + struct gl_program_parameter *p = list->Parameters + i; + struct gl_program_parameter *pCopy; + GLuint size = MIN2(p->Size, 4); + GLint j = _mesa_add_parameter(clone, p->Type, p->Name, size, p->DataType, + list->ParameterValues[i], NULL, 0x0); + ASSERT(j >= 0); + pCopy = clone->Parameters + j; + pCopy->Used = p->Used; + pCopy->Flags = p->Flags; + /* copy state indexes */ + if (p->Type == PROGRAM_STATE_VAR) { + GLint k; + for (k = 0; k < STATE_LENGTH; k++) { + pCopy->StateIndexes[k] = p->StateIndexes[k]; + } + } + else { + clone->Parameters[j].Size = p->Size; + } + + } + + clone->StateFlags = list->StateFlags; + + return clone; + } + + + /** + * Return a new parameter list which is listA + listB. + */ + struct gl_program_parameter_list * + _mesa_combine_parameter_lists(const struct gl_program_parameter_list *listA, + const struct gl_program_parameter_list *listB) + { + struct gl_program_parameter_list *list; + + if (listA) { + list = _mesa_clone_parameter_list(listA); + if (list && listB) { + GLuint i; + for (i = 0; i < listB->NumParameters; i++) { + struct gl_program_parameter *param = listB->Parameters + i; + _mesa_add_parameter(list, param->Type, param->Name, param->Size, + param->DataType, + listB->ParameterValues[i], + param->StateIndexes, + param->Flags); + } + } + } + else if (listB) { + list = _mesa_clone_parameter_list(listB); + } + else { + list = NULL; + } + return list; + } + + + + /** + * Find longest name of all uniform parameters in list. + */ + GLuint + _mesa_longest_parameter_name(const struct gl_program_parameter_list *list, + gl_register_file type) + { + GLuint i, maxLen = 0; + if (!list) + return 0; + for (i = 0; i < list->NumParameters; i++) { + if (list->Parameters[i].Type == type) { + GLuint len = strlen(list->Parameters[i].Name); + if (len > maxLen) + maxLen = len; + } + } + return maxLen; + } + + + /** + * Count the number of parameters in the last that match the given type. + */ + GLuint + _mesa_num_parameters_of_type(const struct gl_program_parameter_list *list, + gl_register_file type) + { + GLuint i, count = 0; + if (list) { + for (i = 0; i < list->NumParameters; i++) { + if (list->Parameters[i].Type == type) + count++; + } + } + return count; + } diff --cc src/mesa/program/symbol_table.c index 00000000000,6a5d6868974..3fea5ee1f1f mode 000000,100644..100644 --- a/src/mesa/program/symbol_table.c +++ b/src/mesa/program/symbol_table.c @@@ -1,0 -1,362 +1,412 @@@ + /* + * Copyright © 2008 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + + #include "main/imports.h" + #include "symbol_table.h" + #include "hash_table.h" + + struct symbol { + /** + * Link to the next symbol in the table with the same name + * + * The linked list of symbols with the same name is ordered by scope + * from inner-most to outer-most. + */ + struct symbol *next_with_same_name; + + + /** + * Link to the next symbol in the table with the same scope + * + * The linked list of symbols with the same scope is unordered. Symbols + * in this list my have unique names. + */ + struct symbol *next_with_same_scope; + + + /** + * Header information for the list of symbols with the same name. + */ + struct symbol_header *hdr; + + + /** + * Name space of the symbol + * + * Name space are arbitrary user assigned integers. No two symbols can + * exist in the same name space at the same scope level. + */ + int name_space; + - ++ /** Scope depth where this symbol was defined. */ ++ unsigned depth; ++ + /** + * Arbitrary user supplied data. + */ + void *data; + }; + + + /** + */ + struct symbol_header { + /** Linkage in list of all headers in a given symbol table. */ + struct symbol_header *next; + + /** Symbol name. */ + const char *name; + + /** Linked list of symbols with the same name. */ + struct symbol *symbols; + }; + + + /** + * Element of the scope stack. + */ + struct scope_level { + /** Link to next (inner) scope level. */ + struct scope_level *next; + + /** Linked list of symbols with the same scope. */ + struct symbol *symbols; + }; + + + /** + * + */ + struct _mesa_symbol_table { + /** Hash table containing all symbols in the symbol table. */ + struct hash_table *ht; + + /** Top of scope stack. */ + struct scope_level *current_scope; + + /** List of all symbol headers in the table. */ + struct symbol_header *hdr; ++ ++ /** Current scope depth. */ ++ unsigned depth; + }; + + + struct _mesa_symbol_table_iterator { + /** + * Name space of symbols returned by this iterator. + */ + int name_space; + + + /** + * Currently iterated symbol + * + * The next call to \c _mesa_symbol_table_iterator_get will return this + * value. It will also update this value to the value that should be + * returned by the next call. + */ + struct symbol *curr; + }; + + + static void + check_symbol_table(struct _mesa_symbol_table *table) + { + #if 1 + struct scope_level *scope; + + for (scope = table->current_scope; scope != NULL; scope = scope->next) { + struct symbol *sym; + + for (sym = scope->symbols + ; sym != NULL + ; sym = sym->next_with_same_name) { + const struct symbol_header *const hdr = sym->hdr; + struct symbol *sym2; + + for (sym2 = hdr->symbols + ; sym2 != NULL + ; sym2 = sym2->next_with_same_name) { + assert(sym2->hdr == hdr); + } + } + } + #endif + } + + void + _mesa_symbol_table_pop_scope(struct _mesa_symbol_table *table) + { + struct scope_level *const scope = table->current_scope; + struct symbol *sym = scope->symbols; + + table->current_scope = scope->next; ++ table->depth--; + + free(scope); + + while (sym != NULL) { + struct symbol *const next = sym->next_with_same_scope; + struct symbol_header *const hdr = sym->hdr; + + assert(hdr->symbols == sym); + + hdr->symbols = sym->next_with_same_name; + + free(sym); + + sym = next; + } + + check_symbol_table(table); + } + + + void + _mesa_symbol_table_push_scope(struct _mesa_symbol_table *table) + { + struct scope_level *const scope = calloc(1, sizeof(*scope)); + + scope->next = table->current_scope; + table->current_scope = scope; ++ table->depth++; + } + + + static struct symbol_header * + find_symbol(struct _mesa_symbol_table *table, const char *name) + { + return (struct symbol_header *) hash_table_find(table->ht, name); + } + + + struct _mesa_symbol_table_iterator * + _mesa_symbol_table_iterator_ctor(struct _mesa_symbol_table *table, + int name_space, const char *name) + { + struct _mesa_symbol_table_iterator *iter = calloc(1, sizeof(*iter)); + struct symbol_header *const hdr = find_symbol(table, name); + + iter->name_space = name_space; + + if (hdr != NULL) { + struct symbol *sym; + + for (sym = hdr->symbols; sym != NULL; sym = sym->next_with_same_name) { + assert(sym->hdr == hdr); + + if ((name_space == -1) || (sym->name_space == name_space)) { + iter->curr = sym; + break; + } + } + } + + return iter; + } + + + void + _mesa_symbol_table_iterator_dtor(struct _mesa_symbol_table_iterator *iter) + { + free(iter); + } + + + void * + _mesa_symbol_table_iterator_get(struct _mesa_symbol_table_iterator *iter) + { + return (iter->curr == NULL) ? NULL : iter->curr->data; + } + + + int + _mesa_symbol_table_iterator_next(struct _mesa_symbol_table_iterator *iter) + { + struct symbol_header *hdr; + + if (iter->curr == NULL) { + return 0; + } + + hdr = iter->curr->hdr; + iter->curr = iter->curr->next_with_same_name; + + while (iter->curr != NULL) { + assert(iter->curr->hdr == hdr); + + if ((iter->name_space == -1) + || (iter->curr->name_space == iter->name_space)) { + return 1; + } + + iter->curr = iter->curr->next_with_same_name; + } + + return 0; + } + + ++/** ++ * Determine the scope "distance" of a symbol from the current scope ++ * ++ * \return ++ * A non-negative number for the number of scopes between the current scope ++ * and the scope where a symbol was defined. A value of zero means the current ++ * scope. A negative number if the symbol does not exist. ++ */ ++int ++_mesa_symbol_table_symbol_scope(struct _mesa_symbol_table *table, ++ int name_space, const char *name) ++{ ++ struct symbol_header *const hdr = find_symbol(table, name); ++ struct symbol *sym; ++ ++ if (hdr != NULL) { ++ for (sym = hdr->symbols; sym != NULL; sym = sym->next_with_same_name) { ++ assert(sym->hdr == hdr); ++ ++ if ((name_space == -1) || (sym->name_space == name_space)) { ++ assert(sym->depth <= table->depth); ++ return sym->depth - table->depth; ++ } ++ } ++ } ++ ++ return -1; ++} ++ ++ + void * + _mesa_symbol_table_find_symbol(struct _mesa_symbol_table *table, + int name_space, const char *name) + { + struct symbol_header *const hdr = find_symbol(table, name); + + if (hdr != NULL) { + struct symbol *sym; + + + for (sym = hdr->symbols; sym != NULL; sym = sym->next_with_same_name) { + assert(sym->hdr == hdr); + + if ((name_space == -1) || (sym->name_space == name_space)) { + return sym->data; + } + } + } + + return NULL; + } + + + int + _mesa_symbol_table_add_symbol(struct _mesa_symbol_table *table, + int name_space, const char *name, + void *declaration) + { + struct symbol_header *hdr; + struct symbol *sym; + + check_symbol_table(table); + + hdr = find_symbol(table, name); + + check_symbol_table(table); + + if (hdr == NULL) { + hdr = calloc(1, sizeof(*hdr)); + hdr->name = name; + + hash_table_insert(table->ht, hdr, name); + hdr->next = table->hdr; + table->hdr = hdr; + } + + check_symbol_table(table); + ++ /* If the symbol already exists in this namespace at this scope, it cannot ++ * be added to the table. ++ */ ++ for (sym = hdr->symbols ++ ; (sym != NULL) && (sym->name_space != name_space) ++ ; sym = sym->next_with_same_name) { ++ /* empty */ ++ } ++ ++ if (sym && (sym->depth == table->depth)) ++ return -1; ++ + sym = calloc(1, sizeof(*sym)); + sym->next_with_same_name = hdr->symbols; + sym->next_with_same_scope = table->current_scope->symbols; + sym->hdr = hdr; + sym->name_space = name_space; + sym->data = declaration; ++ sym->depth = table->depth; + + assert(sym->hdr == hdr); + + hdr->symbols = sym; + table->current_scope->symbols = sym; + + check_symbol_table(table); + return 0; + } + + + struct _mesa_symbol_table * + _mesa_symbol_table_ctor(void) + { + struct _mesa_symbol_table *table = calloc(1, sizeof(*table)); + + if (table != NULL) { + table->ht = hash_table_ctor(32, hash_table_string_hash, + hash_table_string_compare); + + _mesa_symbol_table_push_scope(table); + } + + return table; + } + + + void + _mesa_symbol_table_dtor(struct _mesa_symbol_table *table) + { + struct symbol_header *hdr; + struct symbol_header *next; + + while (table->current_scope != NULL) { + _mesa_symbol_table_pop_scope(table); + } + + for (hdr = table->hdr; hdr != NULL; hdr = next) { + next = hdr->next; + free(hdr); + } + + hash_table_dtor(table->ht); + free(table); + } diff --cc src/mesa/program/symbol_table.h index 00000000000,0c054ef1396..1d570fc1a09 mode 000000,100644..100644 --- a/src/mesa/program/symbol_table.h +++ b/src/mesa/program/symbol_table.h @@@ -1,0 -1,55 +1,58 @@@ + /* + * Copyright © 2008 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + #ifndef MESA_SYMBOL_TABLE_H + #define MESA_SYMBOL_TABLE_H + + struct _mesa_symbol_table; + struct _mesa_symbol_table_iterator; + + extern void _mesa_symbol_table_push_scope(struct _mesa_symbol_table *table); + + extern void _mesa_symbol_table_pop_scope(struct _mesa_symbol_table *table); + + extern int _mesa_symbol_table_add_symbol(struct _mesa_symbol_table *symtab, + int name_space, const char *name, void *declaration); + ++extern int _mesa_symbol_table_symbol_scope(struct _mesa_symbol_table *table, ++ int name_space, const char *name); ++ + extern void *_mesa_symbol_table_find_symbol( + struct _mesa_symbol_table *symtab, int name_space, const char *name); + + extern struct _mesa_symbol_table *_mesa_symbol_table_ctor(void); + + extern void _mesa_symbol_table_dtor(struct _mesa_symbol_table *); + + extern struct _mesa_symbol_table_iterator *_mesa_symbol_table_iterator_ctor( + struct _mesa_symbol_table *table, int name_space, const char *name); + + extern void _mesa_symbol_table_iterator_dtor( + struct _mesa_symbol_table_iterator *); + + extern void *_mesa_symbol_table_iterator_get( + struct _mesa_symbol_table_iterator *iter); + + extern int _mesa_symbol_table_iterator_next( + struct _mesa_symbol_table_iterator *iter); + + #endif /* MESA_SYMBOL_TABLE_H */ diff --cc src/mesa/sources.mak index 117b3f3d2b9,f01b60c4fc8..373f1b50d05 --- a/src/mesa/sources.mak +++ b/src/mesa/sources.mak @@@ -223,56 -228,48 +228,51 @@@ STATETRACKER_SOURCES = state_tracker/st_program.c \ state_tracker/st_texture.c - SHADER_SOURCES = \ - shader/arbprogparse.c \ - shader/arbprogram.c \ - shader/atifragshader.c \ - shader/hash_table.c \ - shader/lex.yy.c \ - shader/nvfragparse.c \ - shader/nvprogram.c \ - shader/nvvertparse.c \ - shader/program.c \ - shader/program_parse.tab.c \ - shader/program_parse_extra.c \ - shader/prog_cache.c \ - shader/prog_execute.c \ - shader/prog_instruction.c \ - shader/prog_noise.c \ - shader/prog_optimize.c \ - shader/prog_parameter.c \ - shader/prog_parameter_layout.c \ - shader/prog_print.c \ - shader/prog_statevars.c \ - shader/prog_uniform.c \ - shader/programopt.c \ - shader/symbol_table.c \ - shader/shader_api.c \ - shader/uniforms.c + PROGRAM_SOURCES = \ + program/arbprogparse.c \ + program/hash_table.c \ + program/lex.yy.c \ + program/nvfragparse.c \ + program/nvvertparse.c \ + program/program.c \ + program/program_parse.tab.c \ + program/program_parse_extra.c \ + program/prog_cache.c \ + program/prog_execute.c \ + program/prog_instruction.c \ + program/prog_noise.c \ + program/prog_optimize.c \ + program/prog_parameter.c \ + program/prog_parameter_layout.c \ + program/prog_print.c \ + program/prog_statevars.c \ + program/prog_uniform.c \ + program/programopt.c \ + program/symbol_table.c +SHADER_CXX_SOURCES = \ - shader/ir_to_mesa.cpp ++ program/ir_to_mesa.cpp + SLANG_SOURCES = \ - shader/slang/slang_builtin.c \ - shader/slang/slang_codegen.c \ - shader/slang/slang_compile.c \ - shader/slang/slang_compile_function.c \ - shader/slang/slang_compile_operation.c \ - shader/slang/slang_compile_struct.c \ - shader/slang/slang_compile_variable.c \ - shader/slang/slang_emit.c \ - shader/slang/slang_ir.c \ - shader/slang/slang_label.c \ - shader/slang/slang_link.c \ - shader/slang/slang_log.c \ - shader/slang/slang_mem.c \ - shader/slang/slang_print.c \ - shader/slang/slang_simplify.c \ - shader/slang/slang_storage.c \ - shader/slang/slang_typeinfo.c \ - shader/slang/slang_vartable.c \ - shader/slang/slang_utility.c + slang/slang_builtin.c \ + slang/slang_codegen.c \ + slang/slang_compile.c \ + slang/slang_compile_function.c \ + slang/slang_compile_operation.c \ + slang/slang_compile_struct.c \ + slang/slang_compile_variable.c \ + slang/slang_emit.c \ + slang/slang_ir.c \ + slang/slang_label.c \ + slang/slang_link.c \ + slang/slang_log.c \ + slang/slang_mem.c \ + slang/slang_print.c \ + slang/slang_simplify.c \ + slang/slang_storage.c \ + slang/slang_typeinfo.c \ + slang/slang_vartable.c \ + slang/slang_utility.c ASM_C_SOURCES = \ x86/common_x86.c \ @@@ -338,12 -333,10 +338,12 @@@ MESA_GALLIUM_SOURCES = $(MATH_SOURCES) \ $(VBO_SOURCES) \ $(STATETRACKER_SOURCES) \ - $(SHADER_SOURCES) \ + $(PROGRAM_SOURCES) \ ppc/common_ppc.c \ - x86/common_x86.c \ - $(SLANG_SOURCES) + x86/common_x86.c + +MESA_GALLIUM_CXX_SOURCES = \ + $(SHADER_CXX_SOURCES) # All the core C sources, for dependency checking ALL_SOURCES = \