include $(TOP)/configs/current
-SUBDIRS = pp cl apps
+LIBNAME = glsl
-default install clean:
- @for dir in $(SUBDIRS) ; do \
- if [ -d $$dir ] ; then \
- (cd $$dir && $(MAKE) $@) || exit 1; \
- fi \
- done
+LIBGLCPP_SOURCES = \
+ glcpp/glcpp-lex.c \
+ glcpp/glcpp-parse.c \
+ glcpp/pp.c \
+ glcpp/xtalloc.c
- ../mesa/shader/hash_table.c \
- ../mesa/shader/symbol_table.c
+GLCPP_SOURCES = \
+ $(LIBGLCPP_SOURCES) \
+ glcpp/glcpp.c
+
+C_SOURCES = \
+ $(LIBGLCPP_SOURCES)
+
+CXX_SOURCES = \
+ ast_expr.cpp \
+ ast_function.cpp \
+ ast_to_hir.cpp \
+ ast_type.cpp \
+ builtin_function.cpp \
+ glsl_lexer.cpp \
+ glsl_parser.cpp \
+ glsl_parser_extras.cpp \
+ glsl_types.cpp \
+ hir_field_selection.cpp \
+ ir_basic_block.cpp \
+ ir_clone.cpp \
+ ir_constant_expression.cpp \
+ ir_constant_folding.cpp \
+ ir_constant_variable.cpp \
+ ir_copy_propagation.cpp \
+ ir.cpp \
+ ir_dead_code.cpp \
+ ir_dead_code_local.cpp \
+ ir_div_to_mul_rcp.cpp \
+ ir_expression_flattening.cpp \
+ ir_function_can_inline.cpp \
+ ir_function.cpp \
+ ir_function_inlining.cpp \
+ ir_hierarchical_visitor.cpp \
+ ir_hv_accept.cpp \
+ ir_if_return.cpp \
+ ir_if_simplification.cpp \
+ ir_if_to_cond_assign.cpp \
+ ir_import_prototypes.cpp \
+ ir_mat_op_to_vec.cpp \
+ ir_mod_to_fract.cpp \
+ ir_print_visitor.cpp \
+ ir_reader.cpp \
+ ir_swizzle_swizzle.cpp \
+ ir_validate.cpp \
+ ir_variable.cpp \
+ ir_vec_index_to_cond_assign.cpp \
+ ir_vec_index_to_swizzle.cpp \
+ linker.cpp \
+ link_functions.cpp \
+ s_expression.cpp
+
+LIBS = \
+ $(TOP)/src/glsl/libglsl.a \
+ $(shell pkg-config --libs talloc)
+
+APPS = glsl_compiler glcpp/glcpp
+
+GLSL2_C_SOURCES = \
- ../mesa/shader/hash_table.o
++ ../mesa/program/hash_table.c \
++ ../mesa/program/symbol_table.c
+GLSL2_CXX_SOURCES = \
+ main.cpp
+
+GLSL2_OBJECTS = \
+ $(GLSL2_C_SOURCES:.c=.o) \
+ $(GLSL2_CXX_SOURCES:.cpp=.o)
+
+### Basic defines ###
+
+DEFINES = \
+ $(LIBRARY_DEFINES) \
+ $(API_DEFINES)
+
+GLCPP_OBJECTS = \
+ $(GLCPP_SOURCES:.c=.o) \
- -I../mesa/shader \
++ ../mesa/program/hash_table.o
+
+OBJECTS = \
+ $(C_SOURCES:.c=.o) \
+ $(CXX_SOURCES:.cpp=.o)
+
+INCLUDES = \
+ -I. \
+ -I../mesa \
+ -I../mapi \
++ -I../mesa/program \
+ -I../../include \
+ $(LIBRARY_INCLUDES)
+
+ALL_SOURCES = \
+ $(C_SOURCES) \
+ $(CXX_SOURCES) \
+ $(GLSL2_CXX_SOURCES) \
+ $(GLSL2_C_SOURCES)
+
+##### TARGETS #####
+
+default: depend lib$(LIBNAME).a $(APPS)
+
+lib$(LIBNAME).a: $(OBJECTS) Makefile $(TOP)/src/glsl/Makefile.template
+ $(MKLIB) -cplusplus -o $(LIBNAME) -static $(OBJECTS)
+
+depend: $(ALL_SOURCES) Makefile
+ rm -f depend
+ touch depend
+ $(MKDEP) $(MKDEP_OPTIONS) $(INCLUDES) $(ALL_SOURCES) 2> /dev/null
+
+# Remove .o and backup files
+clean:
+ rm -f $(OBJECTS) lib$(LIBNAME).a depend depend.bak
+ -rm -f $(APPS)
+
+# Dummy target
+install:
+ @echo -n ""
+
+
+##### RULES #####
+
+glsl_compiler: $(GLSL2_OBJECTS) libglsl.a
+ $(APP_CXX) $(INCLUDES) $(CFLAGS) $(LDFLAGS) $(GLSL2_OBJECTS) $(LIBS) -o $@
+
+glcpp/glcpp: $(GLCPP_OBJECTS) libglsl.a
+ $(APP_CC) $(INCLUDES) $(CFLAGS) $(LDFLAGS) $(GLCPP_OBJECTS) $(LIBS) -o $@
+
+.cpp.o:
+ $(CXX) -c $(INCLUDES) $(CXXFLAGS) $(DEFINES) $< -o $@
+
+.c.o:
+ $(CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
+
+glsl_lexer.cpp: glsl_lexer.lpp
+ flex --never-interactive --outfile="$@" $<
+
+glsl_parser.cpp: glsl_parser.ypp
+ bison -v -o "$@" --defines=glsl_parser.h $<
+
+glcpp/glcpp-lex.c: glcpp/glcpp-lex.l
+ flex --never-interactive --outfile="$@" $<
+
+glcpp/glcpp-parse.c: glcpp/glcpp-parse.y
+ bison -v -o "$@" --defines=glcpp/glcpp-parse.h $<
+
+builtin_function.cpp: builtins/*/*
+ ./builtins/tools/generate_builtins.pl > builtin_function.cpp
+
+-include depend
--- /dev/null
- #include "symbol_table.h"
+/* -*- c++ -*- */
+/*
+ * Copyright © 2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#pragma once
+#ifndef GLSL_SYMBOL_TABLE
+#define GLSL_SYMBOL_TABLE
+
+#include <new>
+
+extern "C" {
++#include "program/symbol_table.h"
+}
+#include "ir.h"
+#include "glsl_types.h"
+
+/**
+ * Facade class for _mesa_symbol_table
+ *
+ * Wraps the existing \c _mesa_symbol_table data structure to enforce some
+ * type safe and some symbol table invariants.
+ */
+class glsl_symbol_table {
+private:
+ enum glsl_symbol_name_space {
+ glsl_variable_name_space = 0,
+ glsl_type_name_space = 1,
+ glsl_function_name_space = 2
+ };
+
+ static int
+ _glsl_symbol_table_destructor (glsl_symbol_table *table)
+ {
+ table->~glsl_symbol_table();
+
+ return 0;
+ }
+
+public:
+ /* Callers of this talloc-based new need not call delete. It's
+ * easier to just talloc_free 'ctx' (or any of its ancestors). */
+ static void* operator new(size_t size, void *ctx)
+ {
+ void *table;
+
+ table = talloc_size(ctx, size);
+ assert(table != NULL);
+
+ talloc_set_destructor(table, (int (*)(void*)) _glsl_symbol_table_destructor);
+
+ return table;
+ }
+
+ /* If the user *does* call delete, that's OK, we will just
+ * talloc_free in that case. Here, C++ will have already called the
+ * destructor so tell talloc not to do that again. */
+ static void operator delete(void *table)
+ {
+ talloc_set_destructor(table, NULL);
+ talloc_free(table);
+ }
+
+ glsl_symbol_table()
+ {
+ table = _mesa_symbol_table_ctor();
+ }
+
+ ~glsl_symbol_table()
+ {
+ _mesa_symbol_table_dtor(table);
+ }
+
+ void push_scope()
+ {
+ _mesa_symbol_table_push_scope(table);
+ }
+
+ void pop_scope()
+ {
+ _mesa_symbol_table_pop_scope(table);
+ }
+
+ /**
+ * Determine whether a name was declared at the current scope
+ */
+ bool name_declared_this_scope(const char *name)
+ {
+ return _mesa_symbol_table_symbol_scope(table, -1, name) == 0;
+ }
+
+ /**
+ * \name Methods to add symbols to the table
+ *
+ * There is some temptation to rename all these functions to \c add_symbol
+ * or similar. However, this breaks symmetry with the getter functions and
+ * reduces the clarity of the intention of code that uses these methods.
+ */
+ /*@{*/
+ bool add_variable(const char *name, ir_variable *v)
+ {
+ return _mesa_symbol_table_add_symbol(table, glsl_variable_name_space,
+ name, v) == 0;
+ }
+
+ bool add_type(const char *name, const glsl_type *t)
+ {
+ return _mesa_symbol_table_add_symbol(table, glsl_type_name_space,
+ name, (void *) t) == 0;
+ }
+
+ bool add_function(const char *name, ir_function *f)
+ {
+ return _mesa_symbol_table_add_symbol(table, glsl_function_name_space,
+ name, f) == 0;
+ }
+ /*@}*/
+
+ /**
+ * \name Methods to get symbols from the table
+ */
+ /*@{*/
+ ir_variable *get_variable(const char *name)
+ {
+ return (ir_variable *)
+ _mesa_symbol_table_find_symbol(table, glsl_variable_name_space, name);
+ }
+
+ glsl_type *get_type(const char *name)
+ {
+ return (glsl_type *)
+ _mesa_symbol_table_find_symbol(table, glsl_type_name_space, name);
+ }
+
+ ir_function *get_function(const char *name)
+ {
+ return (ir_function *)
+ _mesa_symbol_table_find_symbol(table, glsl_function_name_space, name);
+ }
+ /*@}*/
+
+private:
+ struct _mesa_symbol_table *table;
+};
+
+#endif /* GLSL_SYMBOL_TABLE */
--- /dev/null
- #include "shader_api.h"
+/*
+ * Copyright © 2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file linker.cpp
+ * GLSL linker implementation
+ *
+ * Given a set of shaders that are to be linked to generate a final program,
+ * there are three distinct stages.
+ *
+ * In the first stage shaders are partitioned into groups based on the shader
+ * type. All shaders of a particular type (e.g., vertex shaders) are linked
+ * together.
+ *
+ * - Undefined references in each shader are resolve to definitions in
+ * another shader.
+ * - Types and qualifiers of uniforms, outputs, and global variables defined
+ * in multiple shaders with the same name are verified to be the same.
+ * - Initializers for uniforms and global variables defined
+ * in multiple shaders with the same name are verified to be the same.
+ *
+ * The result, in the terminology of the GLSL spec, is a set of shader
+ * executables for each processing unit.
+ *
+ * After the first stage is complete, a series of semantic checks are performed
+ * on each of the shader executables.
+ *
+ * - Each shader executable must define a \c main function.
+ * - Each vertex shader executable must write to \c gl_Position.
+ * - Each fragment shader executable must write to either \c gl_FragData or
+ * \c gl_FragColor.
+ *
+ * In the final stage individual shader executables are linked to create a
+ * complete exectuable.
+ *
+ * - Types of uniforms defined in multiple shader stages with the same name
+ * are verified to be the same.
+ * - Initializers for uniforms defined in multiple shader stages with the
+ * same name are verified to be the same.
+ * - Types and qualifiers of outputs defined in one stage are verified to
+ * be the same as the types and qualifiers of inputs defined with the same
+ * name in a later stage.
+ *
+ * \author Ian Romanick <ian.d.romanick@intel.com>
+ */
+#include <cstdlib>
+#include <cstdio>
+#include <cstdarg>
+#include <climits>
+
+extern "C" {
+#include <talloc.h>
+}
+
+#include "main/mtypes.h"
+#include "main/macros.h"
++#include "main/shaderobj.h"
+#include "glsl_symbol_table.h"
+#include "ir.h"
+#include "program.h"
+#include "hash_table.h"
+#include "linker.h"
+#include "ir_optimization.h"
+
+/**
+ * Visitor that determines whether or not a variable is ever written.
+ */
+class find_assignment_visitor : public ir_hierarchical_visitor {
+public:
+ find_assignment_visitor(const char *name)
+ : name(name), found(false)
+ {
+ /* empty */
+ }
+
+ virtual ir_visitor_status visit_enter(ir_assignment *ir)
+ {
+ ir_variable *const var = ir->lhs->variable_referenced();
+
+ if (strcmp(name, var->name) == 0) {
+ found = true;
+ return visit_stop;
+ }
+
+ return visit_continue_with_parent;
+ }
+
+ bool variable_found()
+ {
+ return found;
+ }
+
+private:
+ const char *name; /**< Find writes to a variable with this name. */
+ bool found; /**< Was a write to the variable found? */
+};
+
+
+void
+linker_error_printf(gl_shader_program *prog, const char *fmt, ...)
+{
+ va_list ap;
+
+ prog->InfoLog = talloc_strdup_append(prog->InfoLog, "error: ");
+ va_start(ap, fmt);
+ prog->InfoLog = talloc_vasprintf_append(prog->InfoLog, fmt, ap);
+ va_end(ap);
+}
+
+
+void
+invalidate_variable_locations(gl_shader *sh, enum ir_variable_mode mode,
+ int generic_base)
+{
+ foreach_list(node, sh->ir) {
+ ir_variable *const var = ((ir_instruction *) node)->as_variable();
+
+ if ((var == NULL) || (var->mode != (unsigned) mode))
+ continue;
+
+ /* Only assign locations for generic attributes / varyings / etc.
+ */
+ if (var->location >= generic_base)
+ var->location = -1;
+ }
+}
+
+
+/**
+ * Determine the number of attribute slots required for a particular type
+ *
+ * This code is here because it implements the language rules of a specific
+ * GLSL version. Since it's a property of the language and not a property of
+ * types in general, it doesn't really belong in glsl_type.
+ */
+unsigned
+count_attribute_slots(const glsl_type *t)
+{
+ /* From page 31 (page 37 of the PDF) of the GLSL 1.50 spec:
+ *
+ * "A scalar input counts the same amount against this limit as a vec4,
+ * so applications may want to consider packing groups of four
+ * unrelated float inputs together into a vector to better utilize the
+ * capabilities of the underlying hardware. A matrix input will use up
+ * multiple locations. The number of locations used will equal the
+ * number of columns in the matrix."
+ *
+ * The spec does not explicitly say how arrays are counted. However, it
+ * should be safe to assume the total number of slots consumed by an array
+ * is the number of entries in the array multiplied by the number of slots
+ * consumed by a single element of the array.
+ */
+
+ if (t->is_array())
+ return t->array_size() * count_attribute_slots(t->element_type());
+
+ if (t->is_matrix())
+ return t->matrix_columns;
+
+ return 1;
+}
+
+
+/**
+ * Verify that a vertex shader executable meets all semantic requirements
+ *
+ * \param shader Vertex shader executable to be verified
+ */
+bool
+validate_vertex_shader_executable(struct gl_shader_program *prog,
+ struct gl_shader *shader)
+{
+ if (shader == NULL)
+ return true;
+
+ find_assignment_visitor find("gl_Position");
+ find.run(shader->ir);
+ if (!find.variable_found()) {
+ linker_error_printf(prog,
+ "vertex shader does not write to `gl_Position'\n");
+ return false;
+ }
+
+ return true;
+}
+
+
+/**
+ * Verify that a fragment shader executable meets all semantic requirements
+ *
+ * \param shader Fragment shader executable to be verified
+ */
+bool
+validate_fragment_shader_executable(struct gl_shader_program *prog,
+ struct gl_shader *shader)
+{
+ if (shader == NULL)
+ return true;
+
+ find_assignment_visitor frag_color("gl_FragColor");
+ find_assignment_visitor frag_data("gl_FragData");
+
+ frag_color.run(shader->ir);
+ frag_data.run(shader->ir);
+
+ if (frag_color.variable_found() && frag_data.variable_found()) {
+ linker_error_printf(prog, "fragment shader writes to both "
+ "`gl_FragColor' and `gl_FragData'\n");
+ return false;
+ }
+
+ return true;
+}
+
+
+/**
+ * Generate a string describing the mode of a variable
+ */
+static const char *
+mode_string(const ir_variable *var)
+{
+ switch (var->mode) {
+ case ir_var_auto:
+ return (var->read_only) ? "global constant" : "global variable";
+
+ case ir_var_uniform: return "uniform";
+ case ir_var_in: return "shader input";
+ case ir_var_out: return "shader output";
+ case ir_var_inout: return "shader inout";
+
+ case ir_var_temporary:
+ default:
+ assert(!"Should not get here.");
+ return "invalid variable";
+ }
+}
+
+
+/**
+ * Perform validation of global variables used across multiple shaders
+ */
+bool
+cross_validate_globals(struct gl_shader_program *prog,
+ struct gl_shader **shader_list,
+ unsigned num_shaders,
+ bool uniforms_only)
+{
+ /* Examine all of the uniforms in all of the shaders and cross validate
+ * them.
+ */
+ glsl_symbol_table variables;
+ for (unsigned i = 0; i < num_shaders; i++) {
+ foreach_list(node, shader_list[i]->ir) {
+ ir_variable *const var = ((ir_instruction *) node)->as_variable();
+
+ if (var == NULL)
+ continue;
+
+ if (uniforms_only && (var->mode != ir_var_uniform))
+ continue;
+
+ /* Don't cross validate temporaries that are at global scope. These
+ * will eventually get pulled into the shaders 'main'.
+ */
+ if (var->mode == ir_var_temporary)
+ continue;
+
+ /* If a global with this name has already been seen, verify that the
+ * new instance has the same type. In addition, if the globals have
+ * initializers, the values of the initializers must be the same.
+ */
+ ir_variable *const existing = variables.get_variable(var->name);
+ if (existing != NULL) {
+ if (var->type != existing->type) {
+ linker_error_printf(prog, "%s `%s' declared as type "
+ "`%s' and type `%s'\n",
+ mode_string(var),
+ var->name, var->type->name,
+ existing->type->name);
+ return false;
+ }
+
+ /* FINISHME: Handle non-constant initializers.
+ */
+ if (var->constant_value != NULL) {
+ if (existing->constant_value != NULL) {
+ if (!var->constant_value->has_value(existing->constant_value)) {
+ linker_error_printf(prog, "initializers for %s "
+ "`%s' have differing values\n",
+ mode_string(var), var->name);
+ return false;
+ }
+ } else
+ /* If the first-seen instance of a particular uniform did not
+ * have an initializer but a later instance does, copy the
+ * initializer to the version stored in the symbol table.
+ */
+ /* FINISHME: This is wrong. The constant_value field should
+ * FINISHME: not be modified! Imagine a case where a shader
+ * FINISHME: without an initializer is linked in two different
+ * FINISHME: programs with shaders that have differing
+ * FINISHME: initializers. Linking with the first will
+ * FINISHME: modify the shader, and linking with the second
+ * FINISHME: will fail.
+ */
+ existing->constant_value = var->constant_value->clone(NULL);
+ }
+ } else
+ variables.add_variable(var->name, var);
+ }
+ }
+
+ return true;
+}
+
+
+/**
+ * Perform validation of uniforms used across multiple shader stages
+ */
+bool
+cross_validate_uniforms(struct gl_shader_program *prog)
+{
+ return cross_validate_globals(prog, prog->_LinkedShaders,
+ prog->_NumLinkedShaders, true);
+}
+
+
+/**
+ * Validate that outputs from one stage match inputs of another
+ */
+bool
+cross_validate_outputs_to_inputs(struct gl_shader_program *prog,
+ gl_shader *producer, gl_shader *consumer)
+{
+ glsl_symbol_table parameters;
+ /* FINISHME: Figure these out dynamically. */
+ const char *const producer_stage = "vertex";
+ const char *const consumer_stage = "fragment";
+
+ /* Find all shader outputs in the "producer" stage.
+ */
+ foreach_list(node, producer->ir) {
+ ir_variable *const var = ((ir_instruction *) node)->as_variable();
+
+ /* FINISHME: For geometry shaders, this should also look for inout
+ * FINISHME: variables.
+ */
+ if ((var == NULL) || (var->mode != ir_var_out))
+ continue;
+
+ parameters.add_variable(var->name, var);
+ }
+
+
+ /* Find all shader inputs in the "consumer" stage. Any variables that have
+ * matching outputs already in the symbol table must have the same type and
+ * qualifiers.
+ */
+ foreach_list(node, consumer->ir) {
+ ir_variable *const input = ((ir_instruction *) node)->as_variable();
+
+ /* FINISHME: For geometry shaders, this should also look for inout
+ * FINISHME: variables.
+ */
+ if ((input == NULL) || (input->mode != ir_var_in))
+ continue;
+
+ ir_variable *const output = parameters.get_variable(input->name);
+ if (output != NULL) {
+ /* Check that the types match between stages.
+ */
+ if (input->type != output->type) {
+ linker_error_printf(prog,
+ "%s shader output `%s' delcared as "
+ "type `%s', but %s shader input declared "
+ "as type `%s'\n",
+ producer_stage, output->name,
+ output->type->name,
+ consumer_stage, input->type->name);
+ return false;
+ }
+
+ /* Check that all of the qualifiers match between stages.
+ */
+ if (input->centroid != output->centroid) {
+ linker_error_printf(prog,
+ "%s shader output `%s' %s centroid qualifier, "
+ "but %s shader input %s centroid qualifier\n",
+ producer_stage,
+ output->name,
+ (output->centroid) ? "has" : "lacks",
+ consumer_stage,
+ (input->centroid) ? "has" : "lacks");
+ return false;
+ }
+
+ if (input->invariant != output->invariant) {
+ linker_error_printf(prog,
+ "%s shader output `%s' %s invariant qualifier, "
+ "but %s shader input %s invariant qualifier\n",
+ producer_stage,
+ output->name,
+ (output->invariant) ? "has" : "lacks",
+ consumer_stage,
+ (input->invariant) ? "has" : "lacks");
+ return false;
+ }
+
+ if (input->interpolation != output->interpolation) {
+ linker_error_printf(prog,
+ "%s shader output `%s' specifies %s "
+ "interpolation qualifier, "
+ "but %s shader input specifies %s "
+ "interpolation qualifier\n",
+ producer_stage,
+ output->name,
+ output->interpolation_string(),
+ consumer_stage,
+ input->interpolation_string());
+ return false;
+ }
+ }
+ }
+
+ return true;
+}
+
+
+/**
+ * Populates a shaders symbol table with all global declarations
+ */
+static void
+populate_symbol_table(gl_shader *sh)
+{
+ sh->symbols = new(sh) glsl_symbol_table;
+
+ foreach_list(node, sh->ir) {
+ ir_instruction *const inst = (ir_instruction *) node;
+ ir_variable *var;
+ ir_function *func;
+
+ if ((func = inst->as_function()) != NULL) {
+ sh->symbols->add_function(func->name, func);
+ } else if ((var = inst->as_variable()) != NULL) {
+ sh->symbols->add_variable(var->name, var);
+ }
+ }
+}
+
+
+/**
+ * Remap variables referenced in an instruction tree
+ *
+ * This is used when instruction trees are cloned from one shader and placed in
+ * another. These trees will contain references to \c ir_variable nodes that
+ * do not exist in the target shader. This function finds these \c ir_variable
+ * references and replaces the references with matching variables in the target
+ * shader.
+ *
+ * If there is no matching variable in the target shader, a clone of the
+ * \c ir_variable is made and added to the target shader. The new variable is
+ * added to \b both the instruction stream and the symbol table.
+ *
+ * \param inst IR tree that is to be processed.
+ * \param symbols Symbol table containing global scope symbols in the
+ * linked shader.
+ * \param instructions Instruction stream where new variable declarations
+ * should be added.
+ */
+void
+remap_variables(ir_instruction *inst, glsl_symbol_table *symbols,
+ exec_list *instructions, hash_table *temps)
+{
+ class remap_visitor : public ir_hierarchical_visitor {
+ public:
+ remap_visitor(glsl_symbol_table *symbols, exec_list *instructions,
+ hash_table *temps)
+ {
+ this->symbols = symbols;
+ this->instructions = instructions;
+ this->temps = temps;
+ }
+
+ virtual ir_visitor_status visit(ir_dereference_variable *ir)
+ {
+ if (ir->var->mode == ir_var_temporary) {
+ ir_variable *var = (ir_variable *) hash_table_find(temps, ir->var);
+
+ assert(var != NULL);
+ ir->var = var;
+ return visit_continue;
+ }
+
+ ir_variable *const existing =
+ this->symbols->get_variable(ir->var->name);
+ if (existing != NULL)
+ ir->var = existing;
+ else {
+ ir_variable *copy = ir->var->clone(NULL);
+
+ this->symbols->add_variable(copy->name, copy);
+ this->instructions->push_head(copy);
+ ir->var = copy;
+ }
+
+ return visit_continue;
+ }
+
+ private:
+ glsl_symbol_table *symbols;
+ exec_list *instructions;
+ hash_table *temps;
+ };
+
+ remap_visitor v(symbols, instructions, temps);
+
+ inst->accept(&v);
+}
+
+
+/**
+ * Move non-declarations from one instruction stream to another
+ *
+ * The intended usage pattern of this function is to pass the pointer to the
+ * head sentinal of a list (i.e., a pointer to the list cast to an \c exec_node
+ * pointer) for \c last and \c false for \c make_copies on the first
+ * call. Successive calls pass the return value of the previous call for
+ * \c last and \c true for \c make_copies.
+ *
+ * \param instructions Source instruction stream
+ * \param last Instruction after which new instructions should be
+ * inserted in the target instruction stream
+ * \param make_copies Flag selecting whether instructions in \c instructions
+ * should be copied (via \c ir_instruction::clone) into the
+ * target list or moved.
+ *
+ * \return
+ * The new "last" instruction in the target instruction stream. This pointer
+ * is suitable for use as the \c last parameter of a later call to this
+ * function.
+ */
+exec_node *
+move_non_declarations(exec_list *instructions, exec_node *last,
+ bool make_copies, gl_shader *target)
+{
+ hash_table *temps = NULL;
+
+ if (make_copies)
+ temps = hash_table_ctor(0, hash_table_pointer_hash,
+ hash_table_pointer_compare);
+
+ foreach_list_safe(node, instructions) {
+ ir_instruction *inst = (ir_instruction *) node;
+
+ if (inst->as_function())
+ continue;
+
+ ir_variable *var = inst->as_variable();
+ if ((var != NULL) && (var->mode != ir_var_temporary))
+ continue;
+
+ assert(inst->as_assignment()
+ || ((var != NULL) && (var->mode == ir_var_temporary)));
+
+ if (make_copies) {
+ inst = inst->clone(NULL);
+
+ if (var != NULL)
+ hash_table_insert(temps, inst, var);
+ else
+ remap_variables(inst, target->symbols, target->ir, temps);
+ } else {
+ inst->remove();
+ }
+
+ last->insert_after(inst);
+ last = inst;
+ }
+
+ if (make_copies)
+ hash_table_dtor(temps);
+
+ return last;
+}
+
+/**
+ * Get the function signature for main from a shader
+ */
+static ir_function_signature *
+get_main_function_signature(gl_shader *sh)
+{
+ ir_function *const f = sh->symbols->get_function("main");
+ if (f != NULL) {
+ exec_list void_parameters;
+
+ /* Look for the 'void main()' signature and ensure that it's defined.
+ * This keeps the linker from accidentally pick a shader that just
+ * contains a prototype for main.
+ *
+ * We don't have to check for multiple definitions of main (in multiple
+ * shaders) because that would have already been caught above.
+ */
+ ir_function_signature *sig = f->matching_signature(&void_parameters);
+ if ((sig != NULL) && sig->is_defined) {
+ return sig;
+ }
+ }
+
+ return NULL;
+}
+
+
+/**
+ * Combine a group of shaders for a single stage to generate a linked shader
+ *
+ * \note
+ * If this function is supplied a single shader, it is cloned, and the new
+ * shader is returned.
+ */
+static struct gl_shader *
+link_intrastage_shaders(struct gl_shader_program *prog,
+ struct gl_shader **shader_list,
+ unsigned num_shaders)
+{
+ /* Check that global variables defined in multiple shaders are consistent.
+ */
+ if (!cross_validate_globals(prog, shader_list, num_shaders, false))
+ return NULL;
+
+ /* Check that there is only a single definition of each function signature
+ * across all shaders.
+ */
+ for (unsigned i = 0; i < (num_shaders - 1); i++) {
+ foreach_list(node, shader_list[i]->ir) {
+ ir_function *const f = ((ir_instruction *) node)->as_function();
+
+ if (f == NULL)
+ continue;
+
+ for (unsigned j = i + 1; j < num_shaders; j++) {
+ ir_function *const other =
+ shader_list[j]->symbols->get_function(f->name);
+
+ /* If the other shader has no function (and therefore no function
+ * signatures) with the same name, skip to the next shader.
+ */
+ if (other == NULL)
+ continue;
+
+ foreach_iter (exec_list_iterator, iter, *f) {
+ ir_function_signature *sig =
+ (ir_function_signature *) iter.get();
+
+ if (!sig->is_defined || sig->is_built_in)
+ continue;
+
+ ir_function_signature *other_sig =
+ other->exact_matching_signature(& sig->parameters);
+
+ if ((other_sig != NULL) && other_sig->is_defined
+ && !other_sig->is_built_in) {
+ linker_error_printf(prog,
+ "function `%s' is multiply defined",
+ f->name);
+ return NULL;
+ }
+ }
+ }
+ }
+ }
+
+ /* Find the shader that defines main, and make a clone of it.
+ *
+ * Starting with the clone, search for undefined references. If one is
+ * found, find the shader that defines it. Clone the reference and add
+ * it to the shader. Repeat until there are no undefined references or
+ * until a reference cannot be resolved.
+ */
+ gl_shader *main = NULL;
+ for (unsigned i = 0; i < num_shaders; i++) {
+ if (get_main_function_signature(shader_list[i]) != NULL) {
+ main = shader_list[i];
+ break;
+ }
+ }
+
+ if (main == NULL) {
+ linker_error_printf(prog, "%s shader lacks `main'\n",
+ (shader_list[0]->Type == GL_VERTEX_SHADER)
+ ? "vertex" : "fragment");
+ return NULL;
+ }
+
+ gl_shader *const linked = _mesa_new_shader(NULL, 0, main->Type);
+ linked->ir = new(linked) exec_list;
+ clone_ir_list(linked->ir, main->ir);
+
+ populate_symbol_table(linked);
+
+ /* The a pointer to the main function in the final linked shader (i.e., the
+ * copy of the original shader that contained the main function).
+ */
+ ir_function_signature *const main_sig = get_main_function_signature(linked);
+
+ /* Move any instructions other than variable declarations or function
+ * declarations into main.
+ */
+ exec_node *insertion_point =
+ move_non_declarations(linked->ir, (exec_node *) &main_sig->body, false,
+ linked);
+
+ for (unsigned i = 0; i < num_shaders; i++) {
+ if (shader_list[i] == main)
+ continue;
+
+ insertion_point = move_non_declarations(shader_list[i]->ir,
+ insertion_point, true, linked);
+ }
+
+ /* Resolve initializers for global variables in the linked shader.
+ */
+ unsigned num_linking_shaders = num_shaders;
+ for (unsigned i = 0; i < num_shaders; i++)
+ num_linking_shaders += shader_list[i]->num_builtins_to_link;
+
+ gl_shader **linking_shaders =
+ (gl_shader **) calloc(num_linking_shaders, sizeof(gl_shader *));
+
+ memcpy(linking_shaders, shader_list,
+ sizeof(linking_shaders[0]) * num_shaders);
+
+ unsigned idx = num_shaders;
+ for (unsigned i = 0; i < num_shaders; i++) {
+ memcpy(&linking_shaders[idx], shader_list[i]->builtins_to_link,
+ sizeof(linking_shaders[0]) * shader_list[i]->num_builtins_to_link);
+ idx += shader_list[i]->num_builtins_to_link;
+ }
+
+ assert(idx == num_linking_shaders);
+
+ link_function_calls(prog, linked, linking_shaders, num_linking_shaders);
+
+ free(linking_shaders);
+
+ return linked;
+}
+
+
+struct uniform_node {
+ exec_node link;
+ struct gl_uniform *u;
+ unsigned slots;
+};
+
+void
+assign_uniform_locations(struct gl_shader_program *prog)
+{
+ /* */
+ exec_list uniforms;
+ unsigned total_uniforms = 0;
+ hash_table *ht = hash_table_ctor(32, hash_table_string_hash,
+ hash_table_string_compare);
+
+ for (unsigned i = 0; i < prog->_NumLinkedShaders; i++) {
+ unsigned next_position = 0;
+
+ foreach_list(node, prog->_LinkedShaders[i]->ir) {
+ ir_variable *const var = ((ir_instruction *) node)->as_variable();
+
+ if ((var == NULL) || (var->mode != ir_var_uniform))
+ continue;
+
+ const unsigned vec4_slots = (var->component_slots() + 3) / 4;
+ assert(vec4_slots != 0);
+
+ uniform_node *n = (uniform_node *) hash_table_find(ht, var->name);
+ if (n == NULL) {
+ n = (uniform_node *) calloc(1, sizeof(struct uniform_node));
+ n->u = (gl_uniform *) calloc(vec4_slots, sizeof(struct gl_uniform));
+ n->slots = vec4_slots;
+
+ n->u[0].Name = strdup(var->name);
+ for (unsigned j = 1; j < vec4_slots; j++)
+ n->u[j].Name = n->u[0].Name;
+
+ hash_table_insert(ht, n, n->u[0].Name);
+ uniforms.push_tail(& n->link);
+ total_uniforms += vec4_slots;
+ }
+
+ if (var->constant_value != NULL)
+ for (unsigned j = 0; j < vec4_slots; j++)
+ n->u[j].Initialized = true;
+
+ var->location = next_position;
+
+ for (unsigned j = 0; j < vec4_slots; j++) {
+ switch (prog->_LinkedShaders[i]->Type) {
+ case GL_VERTEX_SHADER:
+ n->u[j].VertPos = next_position;
+ break;
+ case GL_FRAGMENT_SHADER:
+ n->u[j].FragPos = next_position;
+ break;
+ case GL_GEOMETRY_SHADER:
+ /* FINISHME: Support geometry shaders. */
+ assert(prog->_LinkedShaders[i]->Type != GL_GEOMETRY_SHADER);
+ break;
+ }
+
+ next_position++;
+ }
+ }
+ }
+
+ gl_uniform_list *ul = (gl_uniform_list *)
+ calloc(1, sizeof(gl_uniform_list));
+
+ ul->Size = total_uniforms;
+ ul->NumUniforms = total_uniforms;
+ ul->Uniforms = (gl_uniform *) calloc(total_uniforms, sizeof(gl_uniform));
+
+ unsigned idx = 0;
+ uniform_node *next;
+ for (uniform_node *node = (uniform_node *) uniforms.head
+ ; node->link.next != NULL
+ ; node = next) {
+ next = (uniform_node *) node->link.next;
+
+ node->link.remove();
+ memcpy(&ul->Uniforms[idx], node->u, sizeof(gl_uniform) * node->slots);
+ idx += node->slots;
+
+ free(node->u);
+ free(node);
+ }
+
+ hash_table_dtor(ht);
+
+ prog->Uniforms = ul;
+}
+
+
+/**
+ * Find a contiguous set of available bits in a bitmask
+ *
+ * \param used_mask Bits representing used (1) and unused (0) locations
+ * \param needed_count Number of contiguous bits needed.
+ *
+ * \return
+ * Base location of the available bits on success or -1 on failure.
+ */
+int
+find_available_slots(unsigned used_mask, unsigned needed_count)
+{
+ unsigned needed_mask = (1 << needed_count) - 1;
+ const int max_bit_to_test = (8 * sizeof(used_mask)) - needed_count;
+
+ /* The comparison to 32 is redundant, but without it GCC emits "warning:
+ * cannot optimize possibly infinite loops" for the loop below.
+ */
+ if ((needed_count == 0) || (max_bit_to_test < 0) || (max_bit_to_test > 32))
+ return -1;
+
+ for (int i = 0; i <= max_bit_to_test; i++) {
+ if ((needed_mask & ~used_mask) == needed_mask)
+ return i;
+
+ needed_mask <<= 1;
+ }
+
+ return -1;
+}
+
+
+bool
+assign_attribute_locations(gl_shader_program *prog, unsigned max_attribute_index)
+{
+ /* Mark invalid attribute locations as being used.
+ */
+ unsigned used_locations = (max_attribute_index >= 32)
+ ? ~0 : ~((1 << max_attribute_index) - 1);
+
+ gl_shader *const sh = prog->_LinkedShaders[0];
+ assert(sh->Type == GL_VERTEX_SHADER);
+
+ /* Operate in a total of four passes.
+ *
+ * 1. Invalidate the location assignments for all vertex shader inputs.
+ *
+ * 2. Assign locations for inputs that have user-defined (via
+ * glBindVertexAttribLocation) locatoins.
+ *
+ * 3. Sort the attributes without assigned locations by number of slots
+ * required in decreasing order. Fragmentation caused by attribute
+ * locations assigned by the application may prevent large attributes
+ * from having enough contiguous space.
+ *
+ * 4. Assign locations to any inputs without assigned locations.
+ */
+
+ invalidate_variable_locations(sh, ir_var_in, VERT_ATTRIB_GENERIC0);
+
+ if (prog->Attributes != NULL) {
+ for (unsigned i = 0; i < prog->Attributes->NumParameters; i++) {
+ ir_variable *const var =
+ sh->symbols->get_variable(prog->Attributes->Parameters[i].Name);
+
+ /* Note: attributes that occupy multiple slots, such as arrays or
+ * matrices, may appear in the attrib array multiple times.
+ */
+ if ((var == NULL) || (var->location != -1))
+ continue;
+
+ /* From page 61 of the OpenGL 4.0 spec:
+ *
+ * "LinkProgram will fail if the attribute bindings assigned by
+ * BindAttribLocation do not leave not enough space to assign a
+ * location for an active matrix attribute or an active attribute
+ * array, both of which require multiple contiguous generic
+ * attributes."
+ *
+ * Previous versions of the spec contain similar language but omit the
+ * bit about attribute arrays.
+ *
+ * Page 61 of the OpenGL 4.0 spec also says:
+ *
+ * "It is possible for an application to bind more than one
+ * attribute name to the same location. This is referred to as
+ * aliasing. This will only work if only one of the aliased
+ * attributes is active in the executable program, or if no path
+ * through the shader consumes more than one attribute of a set
+ * of attributes aliased to the same location. A link error can
+ * occur if the linker determines that every path through the
+ * shader consumes multiple aliased attributes, but
+ * implementations are not required to generate an error in this
+ * case."
+ *
+ * These two paragraphs are either somewhat contradictory, or I don't
+ * fully understand one or both of them.
+ */
+ /* FINISHME: The code as currently written does not support attribute
+ * FINISHME: location aliasing (see comment above).
+ */
+ const int attr = prog->Attributes->Parameters[i].StateIndexes[0];
+ const unsigned slots = count_attribute_slots(var->type);
+
+ /* Mask representing the contiguous slots that will be used by this
+ * attribute.
+ */
+ const unsigned use_mask = (1 << slots) - 1;
+
+ /* Generate a link error if the set of bits requested for this
+ * attribute overlaps any previously allocated bits.
+ */
+ if ((~(use_mask << attr) & used_locations) != used_locations) {
+ linker_error_printf(prog,
+ "insufficient contiguous attribute locations "
+ "available for vertex shader input `%s'",
+ var->name);
+ return false;
+ }
+
+ var->location = VERT_ATTRIB_GENERIC0 + attr;
+ used_locations |= (use_mask << attr);
+ }
+ }
+
+ /* Temporary storage for the set of attributes that need locations assigned.
+ */
+ struct temp_attr {
+ unsigned slots;
+ ir_variable *var;
+
+ /* Used below in the call to qsort. */
+ static int compare(const void *a, const void *b)
+ {
+ const temp_attr *const l = (const temp_attr *) a;
+ const temp_attr *const r = (const temp_attr *) b;
+
+ /* Reversed because we want a descending order sort below. */
+ return r->slots - l->slots;
+ }
+ } to_assign[16];
+
+ unsigned num_attr = 0;
+
+ foreach_list(node, sh->ir) {
+ ir_variable *const var = ((ir_instruction *) node)->as_variable();
+
+ if ((var == NULL) || (var->mode != ir_var_in))
+ continue;
+
+ /* The location was explicitly assigned, nothing to do here.
+ */
+ if (var->location != -1)
+ continue;
+
+ to_assign[num_attr].slots = count_attribute_slots(var->type);
+ to_assign[num_attr].var = var;
+ num_attr++;
+ }
+
+ /* If all of the attributes were assigned locations by the application (or
+ * are built-in attributes with fixed locations), return early. This should
+ * be the common case.
+ */
+ if (num_attr == 0)
+ return true;
+
+ qsort(to_assign, num_attr, sizeof(to_assign[0]), temp_attr::compare);
+
+ /* VERT_ATTRIB_GENERIC0 is a psdueo-alias for VERT_ATTRIB_POS. It can only
+ * be explicitly assigned by via glBindAttribLocation. Mark it as reserved
+ * to prevent it from being automatically allocated below.
+ */
+ used_locations |= (1 << 0);
+
+ for (unsigned i = 0; i < num_attr; i++) {
+ /* Mask representing the contiguous slots that will be used by this
+ * attribute.
+ */
+ const unsigned use_mask = (1 << to_assign[i].slots) - 1;
+
+ int location = find_available_slots(used_locations, to_assign[i].slots);
+
+ if (location < 0) {
+ linker_error_printf(prog,
+ "insufficient contiguous attribute locations "
+ "available for vertex shader input `%s'",
+ to_assign[i].var->name);
+ return false;
+ }
+
+ to_assign[i].var->location = VERT_ATTRIB_GENERIC0 + location;
+ used_locations |= (use_mask << location);
+ }
+
+ return true;
+}
+
+
+void
+assign_varying_locations(gl_shader *producer, gl_shader *consumer)
+{
+ /* FINISHME: Set dynamically when geometry shader support is added. */
+ unsigned output_index = VERT_RESULT_VAR0;
+ unsigned input_index = FRAG_ATTRIB_VAR0;
+
+ /* Operate in a total of three passes.
+ *
+ * 1. Assign locations for any matching inputs and outputs.
+ *
+ * 2. Mark output variables in the producer that do not have locations as
+ * not being outputs. This lets the optimizer eliminate them.
+ *
+ * 3. Mark input variables in the consumer that do not have locations as
+ * not being inputs. This lets the optimizer eliminate them.
+ */
+
+ invalidate_variable_locations(producer, ir_var_out, VERT_RESULT_VAR0);
+ invalidate_variable_locations(consumer, ir_var_in, FRAG_ATTRIB_VAR0);
+
+ foreach_list(node, producer->ir) {
+ ir_variable *const output_var = ((ir_instruction *) node)->as_variable();
+
+ if ((output_var == NULL) || (output_var->mode != ir_var_out)
+ || (output_var->location != -1))
+ continue;
+
+ ir_variable *const input_var =
+ consumer->symbols->get_variable(output_var->name);
+
+ if ((input_var == NULL) || (input_var->mode != ir_var_in))
+ continue;
+
+ assert(input_var->location == -1);
+
+ /* FINISHME: Location assignment will need some changes when arrays,
+ * FINISHME: matrices, and structures are allowed as shader inputs /
+ * FINISHME: outputs.
+ */
+ output_var->location = output_index;
+ input_var->location = input_index;
+
+ output_index++;
+ input_index++;
+ }
+
+ foreach_list(node, producer->ir) {
+ ir_variable *const var = ((ir_instruction *) node)->as_variable();
+
+ if ((var == NULL) || (var->mode != ir_var_out))
+ continue;
+
+ /* An 'out' variable is only really a shader output if its value is read
+ * by the following stage.
+ */
+ if (var->location == -1) {
+ var->shader_out = false;
+ var->mode = ir_var_auto;
+ }
+ }
+
+ foreach_list(node, consumer->ir) {
+ ir_variable *const var = ((ir_instruction *) node)->as_variable();
+
+ if ((var == NULL) || (var->mode != ir_var_in))
+ continue;
+
+ /* An 'in' variable is only really a shader input if its value is written
+ * by the previous stage.
+ */
+ var->shader_in = (var->location != -1);
+ }
+}
+
+
+void
+link_shaders(struct gl_shader_program *prog)
+{
+ prog->LinkStatus = false;
+ prog->Validated = false;
+ prog->_Used = false;
+
+ if (prog->InfoLog != NULL)
+ talloc_free(prog->InfoLog);
+
+ prog->InfoLog = talloc_strdup(NULL, "");
+
+ /* Separate the shaders into groups based on their type.
+ */
+ struct gl_shader **vert_shader_list;
+ unsigned num_vert_shaders = 0;
+ struct gl_shader **frag_shader_list;
+ unsigned num_frag_shaders = 0;
+
+ vert_shader_list = (struct gl_shader **)
+ calloc(2 * prog->NumShaders, sizeof(struct gl_shader *));
+ frag_shader_list = &vert_shader_list[prog->NumShaders];
+
+ unsigned min_version = UINT_MAX;
+ unsigned max_version = 0;
+ for (unsigned i = 0; i < prog->NumShaders; i++) {
+ min_version = MIN2(min_version, prog->Shaders[i]->Version);
+ max_version = MAX2(max_version, prog->Shaders[i]->Version);
+
+ switch (prog->Shaders[i]->Type) {
+ case GL_VERTEX_SHADER:
+ vert_shader_list[num_vert_shaders] = prog->Shaders[i];
+ num_vert_shaders++;
+ break;
+ case GL_FRAGMENT_SHADER:
+ frag_shader_list[num_frag_shaders] = prog->Shaders[i];
+ num_frag_shaders++;
+ break;
+ case GL_GEOMETRY_SHADER:
+ /* FINISHME: Support geometry shaders. */
+ assert(prog->Shaders[i]->Type != GL_GEOMETRY_SHADER);
+ break;
+ }
+ }
+
+ /* Previous to GLSL version 1.30, different compilation units could mix and
+ * match shading language versions. With GLSL 1.30 and later, the versions
+ * of all shaders must match.
+ */
+ assert(min_version >= 110);
+ assert(max_version <= 130);
+ if ((max_version >= 130) && (min_version != max_version)) {
+ linker_error_printf(prog, "all shaders must use same shading "
+ "language version\n");
+ goto done;
+ }
+
+ prog->Version = max_version;
+
+ /* Link all shaders for a particular stage and validate the result.
+ */
+ prog->_NumLinkedShaders = 0;
+ if (num_vert_shaders > 0) {
+ gl_shader *const sh =
+ link_intrastage_shaders(prog, vert_shader_list, num_vert_shaders);
+
+ if (sh == NULL)
+ goto done;
+
+ if (!validate_vertex_shader_executable(prog, sh))
+ goto done;
+
+ prog->_LinkedShaders[prog->_NumLinkedShaders] = sh;
+ prog->_NumLinkedShaders++;
+ }
+
+ if (num_frag_shaders > 0) {
+ gl_shader *const sh =
+ link_intrastage_shaders(prog, frag_shader_list, num_frag_shaders);
+
+ if (sh == NULL)
+ goto done;
+
+ if (!validate_fragment_shader_executable(prog, sh))
+ goto done;
+
+ prog->_LinkedShaders[prog->_NumLinkedShaders] = sh;
+ prog->_NumLinkedShaders++;
+ }
+
+ /* Here begins the inter-stage linking phase. Some initial validation is
+ * performed, then locations are assigned for uniforms, attributes, and
+ * varyings.
+ */
+ if (cross_validate_uniforms(prog)) {
+ /* Validate the inputs of each stage with the output of the preceeding
+ * stage.
+ */
+ for (unsigned i = 1; i < prog->_NumLinkedShaders; i++) {
+ if (!cross_validate_outputs_to_inputs(prog,
+ prog->_LinkedShaders[i - 1],
+ prog->_LinkedShaders[i]))
+ goto done;
+ }
+
+ prog->LinkStatus = true;
+ }
+
+ /* FINISHME: Perform whole-program optimization here. */
+ for (unsigned i = 0; i < prog->_NumLinkedShaders; i++) {
+ /* Optimization passes */
+ bool progress;
+ exec_list *ir = prog->_LinkedShaders[i]->ir;
+
+ /* Lowering */
+ do_mat_op_to_vec(ir);
+ do_mod_to_fract(ir);
+ do_div_to_mul_rcp(ir);
+
+ do {
+ progress = false;
+
+ progress = do_function_inlining(ir) || progress;
+ progress = do_if_simplification(ir) || progress;
+ progress = do_copy_propagation(ir) || progress;
+ progress = do_dead_code_local(ir) || progress;
+#if 0
+ progress = do_dead_code_unlinked(state, ir) || progress;
+#endif
+ progress = do_constant_variable_unlinked(ir) || progress;
+ progress = do_constant_folding(ir) || progress;
+ progress = do_if_return(ir) || progress;
+#if 0
+ if (ctx->Shader.EmitNoIfs)
+ progress = do_if_to_cond_assign(ir) || progress;
+#endif
+
+ progress = do_vec_index_to_swizzle(ir) || progress;
+ /* Do this one after the previous to let the easier pass handle
+ * constant vector indexing.
+ */
+ progress = do_vec_index_to_cond_assign(ir) || progress;
+
+ progress = do_swizzle_swizzle(ir) || progress;
+ } while (progress);
+ }
+
+ assign_uniform_locations(prog);
+
+ if (prog->_LinkedShaders[0]->Type == GL_VERTEX_SHADER)
+ /* FINISHME: The value of the max_attribute_index parameter is
+ * FINISHME: implementation dependent based on the value of
+ * FINISHME: GL_MAX_VERTEX_ATTRIBS. GL_MAX_VERTEX_ATTRIBS must be
+ * FINISHME: at least 16, so hardcode 16 for now.
+ */
+ if (!assign_attribute_locations(prog, 16))
+ goto done;
+
+ for (unsigned i = 1; i < prog->_NumLinkedShaders; i++)
+ assign_varying_locations(prog->_LinkedShaders[i - 1],
+ prog->_LinkedShaders[i]);
+
+ /* FINISHME: Assign fragment shader output locations. */
+
+done:
+ free(vert_shader_list);
+}
--- /dev/null
- #include "shader/prog_parameter.h"
- #include "shader/prog_uniform.h"
+/*
+ * Copyright (C) 1999-2008 Brian Paul All Rights Reserved.
+ * Copyright (C) 2009 VMware, Inc. All Rights Reserved.
+ * Copyright © 2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <GL/gl.h>
+#include "main/mtypes.h"
+
+extern "C" {
++#include "program/prog_parameter.h"
++#include "program/prog_uniform.h"
+}
+
+extern void
+link_shaders(struct gl_shader_program *prog);
--- /dev/null
- (void) _slang_compile(ctx, sh);
+ /*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 2004-2008 Brian Paul All Rights Reserved.
+ * Copyright (C) 2009-2010 VMware, Inc. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+ /**
+ * \file shaderapi.c
+ * \author Brian Paul
+ *
+ * Implementation of GLSL-related API functions.
+ * The glUniform* functions are in uniforms.c
+ *
+ *
+ * XXX things to do:
+ * 1. Check that the right error code is generated for all _mesa_error() calls.
+ * 2. Insert FLUSH_VERTICES calls in various places
+ */
+
+
+ #include "main/glheader.h"
+ #include "main/context.h"
+ #include "main/dispatch.h"
+ #include "main/enums.h"
+ #include "main/hash.h"
+ #include "main/shaderapi.h"
+ #include "main/shaderobj.h"
+ #include "program/program.h"
+ #include "program/prog_parameter.h"
+ #include "program/prog_uniform.h"
+ #include "slang/slang_compile.h"
+ #include "slang/slang_link.h"
++#include "talloc.h"
+
+
+ /** Define this to enable shader substitution (see below) */
+ #define SHADER_SUBST 0
+
+
+ /**
+ * Return mask of GLSL_x flags by examining the MESA_GLSL env var.
+ */
+ static GLbitfield
+ get_shader_flags(void)
+ {
+ GLbitfield flags = 0x0;
+ const char *env = _mesa_getenv("MESA_GLSL");
+
+ if (env) {
+ if (strstr(env, "dump"))
+ flags |= GLSL_DUMP;
+ if (strstr(env, "log"))
+ flags |= GLSL_LOG;
+ if (strstr(env, "nopvert"))
+ flags |= GLSL_NOP_VERT;
+ if (strstr(env, "nopfrag"))
+ flags |= GLSL_NOP_FRAG;
+ if (strstr(env, "nopt"))
+ flags |= GLSL_NO_OPT;
+ else if (strstr(env, "opt"))
+ flags |= GLSL_OPT;
+ if (strstr(env, "uniform"))
+ flags |= GLSL_UNIFORMS;
+ if (strstr(env, "useprog"))
+ flags |= GLSL_USE_PROG;
+ }
+
+ return flags;
+ }
+
+
+ /**
+ * Initialize context's shader state.
+ */
+ void
+ _mesa_init_shader_state(GLcontext *ctx)
+ {
+ /* Device drivers may override these to control what kind of instructions
+ * are generated by the GLSL compiler.
+ */
+ ctx->Shader.EmitHighLevelInstructions = GL_TRUE;
+ ctx->Shader.EmitContReturn = GL_TRUE;
+ ctx->Shader.EmitCondCodes = GL_FALSE;
+ ctx->Shader.EmitComments = GL_FALSE;
++ ctx->Shader.EmitNoIfs = GL_FALSE;
+ ctx->Shader.Flags = get_shader_flags();
+
+ /* Default pragma settings */
+ ctx->Shader.DefaultPragmas.IgnoreOptimize = GL_FALSE;
+ ctx->Shader.DefaultPragmas.IgnoreDebug = GL_FALSE;
+ ctx->Shader.DefaultPragmas.Optimize = GL_TRUE;
+ ctx->Shader.DefaultPragmas.Debug = GL_FALSE;
+ }
+
+
+ /**
+ * Free the per-context shader-related state.
+ */
+ void
+ _mesa_free_shader_state(GLcontext *ctx)
+ {
+ _mesa_reference_shader_program(ctx, &ctx->Shader.CurrentProgram, NULL);
+ }
+
+
+ /**
+ * Return the size of the given GLSL datatype, in floats (components).
+ */
+ GLint
+ _mesa_sizeof_glsl_type(GLenum type)
+ {
+ switch (type) {
+ case GL_FLOAT:
+ case GL_INT:
+ case GL_BOOL:
+ case GL_SAMPLER_1D:
+ case GL_SAMPLER_2D:
+ case GL_SAMPLER_3D:
+ case GL_SAMPLER_CUBE:
+ case GL_SAMPLER_1D_SHADOW:
+ case GL_SAMPLER_2D_SHADOW:
+ case GL_SAMPLER_2D_RECT_ARB:
+ case GL_SAMPLER_2D_RECT_SHADOW_ARB:
+ case GL_SAMPLER_1D_ARRAY_EXT:
+ case GL_SAMPLER_2D_ARRAY_EXT:
+ case GL_SAMPLER_1D_ARRAY_SHADOW_EXT:
+ case GL_SAMPLER_2D_ARRAY_SHADOW_EXT:
+ case GL_SAMPLER_CUBE_SHADOW_EXT:
+ return 1;
+ case GL_FLOAT_VEC2:
+ case GL_INT_VEC2:
+ case GL_UNSIGNED_INT_VEC2:
+ case GL_BOOL_VEC2:
+ return 2;
+ case GL_FLOAT_VEC3:
+ case GL_INT_VEC3:
+ case GL_UNSIGNED_INT_VEC3:
+ case GL_BOOL_VEC3:
+ return 3;
+ case GL_FLOAT_VEC4:
+ case GL_INT_VEC4:
+ case GL_UNSIGNED_INT_VEC4:
+ case GL_BOOL_VEC4:
+ return 4;
+ case GL_FLOAT_MAT2:
+ case GL_FLOAT_MAT2x3:
+ case GL_FLOAT_MAT2x4:
+ return 8; /* two float[4] vectors */
+ case GL_FLOAT_MAT3:
+ case GL_FLOAT_MAT3x2:
+ case GL_FLOAT_MAT3x4:
+ return 12; /* three float[4] vectors */
+ case GL_FLOAT_MAT4:
+ case GL_FLOAT_MAT4x2:
+ case GL_FLOAT_MAT4x3:
+ return 16; /* four float[4] vectors */
+ default:
+ _mesa_problem(NULL, "Invalid type in _mesa_sizeof_glsl_type()");
+ return 1;
+ }
+ }
+
+
+ /**
+ * Copy string from <src> to <dst>, up to maxLength characters, returning
+ * length of <dst> in <length>.
+ * \param src the strings source
+ * \param maxLength max chars to copy
+ * \param length returns number of chars copied
+ * \param dst the string destination
+ */
+ void
+ _mesa_copy_string(GLchar *dst, GLsizei maxLength,
+ GLsizei *length, const GLchar *src)
+ {
+ GLsizei len;
+ for (len = 0; len < maxLength - 1 && src && src[len]; len++)
+ dst[len] = src[len];
+ if (maxLength > 0)
+ dst[len] = 0;
+ if (length)
+ *length = len;
+ }
+
+
+
+ /**
+ * Find the length of the longest transform feedback varying name
+ * which was specified with glTransformFeedbackVaryings().
+ */
+ static GLint
+ longest_feedback_varying_name(const struct gl_shader_program *shProg)
+ {
+ GLuint i;
+ GLint max = 0;
+ for (i = 0; i < shProg->TransformFeedback.NumVarying; i++) {
+ GLint len = strlen(shProg->TransformFeedback.VaryingNames[i]);
+ if (len > max)
+ max = len;
+ }
+ return max;
+ }
+
+
+
+ static GLboolean
+ is_program(GLcontext *ctx, GLuint name)
+ {
+ struct gl_shader_program *shProg = _mesa_lookup_shader_program(ctx, name);
+ return shProg ? GL_TRUE : GL_FALSE;
+ }
+
+
+ static GLboolean
+ is_shader(GLcontext *ctx, GLuint name)
+ {
+ struct gl_shader *shader = _mesa_lookup_shader(ctx, name);
+ return shader ? GL_TRUE : GL_FALSE;
+ }
+
+
+ /**
+ * Attach shader to a shader program.
+ */
+ static void
+ attach_shader(GLcontext *ctx, GLuint program, GLuint shader)
+ {
+ struct gl_shader_program *shProg;
+ struct gl_shader *sh;
+ GLuint i, n;
+
+ shProg = _mesa_lookup_shader_program_err(ctx, program, "glAttachShader");
+ if (!shProg)
+ return;
+
+ sh = _mesa_lookup_shader_err(ctx, shader, "glAttachShader");
+ if (!sh) {
+ return;
+ }
+
+ n = shProg->NumShaders;
+ for (i = 0; i < n; i++) {
+ if (shProg->Shaders[i] == sh) {
+ /* The shader is already attched to this program. The
+ * GL_ARB_shader_objects spec says:
+ *
+ * "The error INVALID_OPERATION is generated by AttachObjectARB
+ * if <obj> is already attached to <containerObj>."
+ */
+ _mesa_error(ctx, GL_INVALID_OPERATION, "glAttachShader");
+ return;
+ }
+ }
+
+ /* grow list */
+ shProg->Shaders = (struct gl_shader **)
+ _mesa_realloc(shProg->Shaders,
+ n * sizeof(struct gl_shader *),
+ (n + 1) * sizeof(struct gl_shader *));
+ if (!shProg->Shaders) {
+ _mesa_error(ctx, GL_OUT_OF_MEMORY, "glAttachShader");
+ return;
+ }
+
+ /* append */
+ shProg->Shaders[n] = NULL; /* since realloc() didn't zero the new space */
+ _mesa_reference_shader(ctx, &shProg->Shaders[n], sh);
+ shProg->NumShaders++;
+ }
+
+
+ static GLint
+ get_attrib_location(GLcontext *ctx, GLuint program, const GLchar *name)
+ {
+ struct gl_shader_program *shProg
+ = _mesa_lookup_shader_program_err(ctx, program, "glGetAttribLocation");
+
+ if (!shProg) {
+ return -1;
+ }
+
+ if (!shProg->LinkStatus) {
+ _mesa_error(ctx, GL_INVALID_OPERATION,
+ "glGetAttribLocation(program not linked)");
+ return -1;
+ }
+
+ if (!name)
+ return -1;
+
+ if (shProg->VertexProgram) {
+ const struct gl_program_parameter_list *attribs =
+ shProg->VertexProgram->Base.Attributes;
+ if (attribs) {
+ GLint i = _mesa_lookup_parameter_index(attribs, -1, name);
+ if (i >= 0) {
+ return attribs->Parameters[i].StateIndexes[0];
+ }
+ }
+ }
+ return -1;
+ }
+
+
+ static void
+ bind_attrib_location(GLcontext *ctx, GLuint program, GLuint index,
+ const GLchar *name)
+ {
+ struct gl_shader_program *shProg;
+ const GLint size = -1; /* unknown size */
+ GLint i, oldIndex;
+ GLenum datatype = GL_FLOAT_VEC4;
+
+ shProg = _mesa_lookup_shader_program_err(ctx, program,
+ "glBindAttribLocation");
+ if (!shProg) {
+ return;
+ }
+
+ if (!name)
+ return;
+
+ if (strncmp(name, "gl_", 3) == 0) {
+ _mesa_error(ctx, GL_INVALID_OPERATION,
+ "glBindAttribLocation(illegal name)");
+ return;
+ }
+
+ if (index >= ctx->Const.VertexProgram.MaxAttribs) {
+ _mesa_error(ctx, GL_INVALID_VALUE, "glBindAttribLocation(index)");
+ return;
+ }
+
+ if (shProg->LinkStatus) {
+ /* get current index/location for the attribute */
+ oldIndex = get_attrib_location(ctx, program, name);
+ }
+ else {
+ oldIndex = -1;
+ }
+
+ /* this will replace the current value if it's already in the list */
+ i = _mesa_add_attribute(shProg->Attributes, name, size, datatype, index);
+ if (i < 0) {
+ _mesa_error(ctx, GL_OUT_OF_MEMORY, "glBindAttribLocation");
+ return;
+ }
+
+ /*
+ * Note that this attribute binding won't go into effect until
+ * glLinkProgram is called again.
+ */
+ }
+
+
+ static GLuint
+ create_shader(GLcontext *ctx, GLenum type)
+ {
+ struct gl_shader *sh;
+ GLuint name;
+
+ name = _mesa_HashFindFreeKeyBlock(ctx->Shared->ShaderObjects, 1);
+
+ switch (type) {
+ case GL_FRAGMENT_SHADER:
+ case GL_VERTEX_SHADER:
+ case GL_GEOMETRY_SHADER_ARB:
+ sh = ctx->Driver.NewShader(ctx, name, type);
+ break;
+ default:
+ _mesa_error(ctx, GL_INVALID_ENUM, "CreateShader(type)");
+ return 0;
+ }
+
+ _mesa_HashInsert(ctx->Shared->ShaderObjects, name, sh);
+
+ return name;
+ }
+
+
+ static GLuint
+ create_shader_program(GLcontext *ctx)
+ {
+ GLuint name;
+ struct gl_shader_program *shProg;
+
+ name = _mesa_HashFindFreeKeyBlock(ctx->Shared->ShaderObjects, 1);
+
+ shProg = ctx->Driver.NewShaderProgram(ctx, name);
+
+ _mesa_HashInsert(ctx->Shared->ShaderObjects, name, shProg);
+
+ assert(shProg->RefCount == 1);
+
+ return name;
+ }
+
+
+ /**
+ * Named w/ "2" to indicate OpenGL 2.x vs GL_ARB_fragment_programs's
+ * DeleteProgramARB.
+ */
+ static void
+ delete_shader_program(GLcontext *ctx, GLuint name)
+ {
+ /*
+ * NOTE: deleting shaders/programs works a bit differently than
+ * texture objects (and buffer objects, etc). Shader/program
+ * handles/IDs exist in the hash table until the object is really
+ * deleted (refcount==0). With texture objects, the handle/ID is
+ * removed from the hash table in glDeleteTextures() while the tex
+ * object itself might linger until its refcount goes to zero.
+ */
+ struct gl_shader_program *shProg;
+
+ shProg = _mesa_lookup_shader_program_err(ctx, name, "glDeleteProgram");
+ if (!shProg)
+ return;
+
+ shProg->DeletePending = GL_TRUE;
+
+ /* effectively, decr shProg's refcount */
+ _mesa_reference_shader_program(ctx, &shProg, NULL);
+ }
+
+
+ static void
+ delete_shader(GLcontext *ctx, GLuint shader)
+ {
+ struct gl_shader *sh;
+
+ sh = _mesa_lookup_shader_err(ctx, shader, "glDeleteShader");
+ if (!sh)
+ return;
+
+ sh->DeletePending = GL_TRUE;
+
+ /* effectively, decr sh's refcount */
+ _mesa_reference_shader(ctx, &sh, NULL);
+ }
+
+
+ static void
+ detach_shader(GLcontext *ctx, GLuint program, GLuint shader)
+ {
+ struct gl_shader_program *shProg;
+ GLuint n;
+ GLuint i, j;
+
+ shProg = _mesa_lookup_shader_program_err(ctx, program, "glDetachShader");
+ if (!shProg)
+ return;
+
+ n = shProg->NumShaders;
+
+ for (i = 0; i < n; i++) {
+ if (shProg->Shaders[i]->Name == shader) {
+ /* found it */
+ struct gl_shader **newList;
+
+ /* release */
+ _mesa_reference_shader(ctx, &shProg->Shaders[i], NULL);
+
+ /* alloc new, smaller array */
+ newList = (struct gl_shader **)
+ malloc((n - 1) * sizeof(struct gl_shader *));
+ if (!newList) {
+ _mesa_error(ctx, GL_OUT_OF_MEMORY, "glDetachShader");
+ return;
+ }
+ for (j = 0; j < i; j++) {
+ newList[j] = shProg->Shaders[j];
+ }
+ while (++i < n)
+ newList[j++] = shProg->Shaders[i];
+ free(shProg->Shaders);
+
+ shProg->Shaders = newList;
+ shProg->NumShaders = n - 1;
+
+ #ifdef DEBUG
+ /* sanity check */
+ {
+ for (j = 0; j < shProg->NumShaders; j++) {
+ assert(shProg->Shaders[j]->Type == GL_VERTEX_SHADER ||
+ shProg->Shaders[j]->Type == GL_FRAGMENT_SHADER);
+ assert(shProg->Shaders[j]->RefCount > 0);
+ }
+ }
+ #endif
+
+ return;
+ }
+ }
+
+ /* not found */
+ {
+ GLenum err;
+ if (is_shader(ctx, shader))
+ err = GL_INVALID_OPERATION;
+ else if (is_program(ctx, shader))
+ err = GL_INVALID_OPERATION;
+ else
+ err = GL_INVALID_VALUE;
+ _mesa_error(ctx, err, "glDetachProgram(shader)");
+ return;
+ }
+ }
+
+
+ static void
+ get_active_attrib(GLcontext *ctx, GLuint program, GLuint index,
+ GLsizei maxLength, GLsizei *length, GLint *size,
+ GLenum *type, GLchar *nameOut)
+ {
+ const struct gl_program_parameter_list *attribs = NULL;
+ struct gl_shader_program *shProg;
+
+ shProg = _mesa_lookup_shader_program_err(ctx, program, "glGetActiveAttrib");
+ if (!shProg)
+ return;
+
+ if (shProg->VertexProgram)
+ attribs = shProg->VertexProgram->Base.Attributes;
+
+ if (!attribs || index >= attribs->NumParameters) {
+ _mesa_error(ctx, GL_INVALID_VALUE, "glGetActiveAttrib(index)");
+ return;
+ }
+
+ _mesa_copy_string(nameOut, maxLength, length,
+ attribs->Parameters[index].Name);
+
+ if (size)
+ *size = attribs->Parameters[index].Size
+ / _mesa_sizeof_glsl_type(attribs->Parameters[index].DataType);
+
+ if (type)
+ *type = attribs->Parameters[index].DataType;
+ }
+
+
+ /**
+ * Return list of shaders attached to shader program.
+ */
+ static void
+ get_attached_shaders(GLcontext *ctx, GLuint program, GLsizei maxCount,
+ GLsizei *count, GLuint *obj)
+ {
+ struct gl_shader_program *shProg =
+ _mesa_lookup_shader_program_err(ctx, program, "glGetAttachedShaders");
+ if (shProg) {
+ GLuint i;
+ for (i = 0; i < (GLuint) maxCount && i < shProg->NumShaders; i++) {
+ obj[i] = shProg->Shaders[i]->Name;
+ }
+ if (count)
+ *count = i;
+ }
+ }
+
+
+ /**
+ * glGetHandleARB() - return ID/name of currently bound shader program.
+ */
+ static GLuint
+ get_handle(GLcontext *ctx, GLenum pname)
+ {
+ if (pname == GL_PROGRAM_OBJECT_ARB) {
+ if (ctx->Shader.CurrentProgram)
+ return ctx->Shader.CurrentProgram->Name;
+ else
+ return 0;
+ }
+ else {
+ _mesa_error(ctx, GL_INVALID_ENUM, "glGetHandleARB");
+ return 0;
+ }
+ }
+
+
+ /**
+ * glGetProgramiv() - get shader program state.
+ * Note that this is for GLSL shader programs, not ARB vertex/fragment
+ * programs (see glGetProgramivARB).
+ */
+ static void
+ get_programiv(GLcontext *ctx, GLuint program, GLenum pname, GLint *params)
+ {
+ const struct gl_program_parameter_list *attribs;
+ struct gl_shader_program *shProg
+ = _mesa_lookup_shader_program(ctx, program);
+
+ if (!shProg) {
+ _mesa_error(ctx, GL_INVALID_VALUE, "glGetProgramiv(program)");
+ return;
+ }
+
+ if (shProg->VertexProgram)
+ attribs = shProg->VertexProgram->Base.Attributes;
+ else
+ attribs = NULL;
+
+ switch (pname) {
+ case GL_DELETE_STATUS:
+ *params = shProg->DeletePending;
+ break;
+ case GL_LINK_STATUS:
+ *params = shProg->LinkStatus;
+ break;
+ case GL_VALIDATE_STATUS:
+ *params = shProg->Validated;
+ break;
+ case GL_INFO_LOG_LENGTH:
+ *params = shProg->InfoLog ? strlen(shProg->InfoLog) + 1 : 0;
+ break;
+ case GL_ATTACHED_SHADERS:
+ *params = shProg->NumShaders;
+ break;
+ case GL_ACTIVE_ATTRIBUTES:
+ *params = attribs ? attribs->NumParameters : 0;
+ break;
+ case GL_ACTIVE_ATTRIBUTE_MAX_LENGTH:
+ *params = _mesa_longest_parameter_name(attribs, PROGRAM_INPUT) + 1;
+ break;
+ case GL_ACTIVE_UNIFORMS:
+ *params = shProg->Uniforms ? shProg->Uniforms->NumUniforms : 0;
+ break;
+ case GL_ACTIVE_UNIFORM_MAX_LENGTH:
+ *params = _mesa_longest_uniform_name(shProg->Uniforms);
+ if (*params > 0)
+ (*params)++; /* add one for terminating zero */
+ break;
+ case GL_PROGRAM_BINARY_LENGTH_OES:
+ *params = 0;
+ break;
+ #if FEATURE_EXT_transform_feedback
+ case GL_TRANSFORM_FEEDBACK_VARYINGS:
+ *params = shProg->TransformFeedback.NumVarying;
+ break;
+ case GL_TRANSFORM_FEEDBACK_VARYING_MAX_LENGTH:
+ *params = longest_feedback_varying_name(shProg) + 1;
+ break;
+ case GL_TRANSFORM_FEEDBACK_BUFFER_MODE:
+ *params = shProg->TransformFeedback.BufferMode;
+ break;
+ #endif
+ #if FEATURE_ARB_geometry_shader4
+ case GL_GEOMETRY_VERTICES_OUT_ARB:
+ *params = shProg->Geom.VerticesOut;
+ break;
+ case GL_GEOMETRY_INPUT_TYPE_ARB:
+ *params = shProg->Geom.InputType;
+ break;
+ case GL_GEOMETRY_OUTPUT_TYPE_ARB:
+ *params = shProg->Geom.OutputType;
+ break;
+ #endif
+ default:
+ _mesa_error(ctx, GL_INVALID_ENUM, "glGetProgramiv(pname)");
+ return;
+ }
+ }
+
+
+ /**
+ * glGetShaderiv() - get GLSL shader state
+ */
+ static void
+ get_shaderiv(GLcontext *ctx, GLuint name, GLenum pname, GLint *params)
+ {
+ struct gl_shader *shader =
+ _mesa_lookup_shader_err(ctx, name, "glGetShaderiv");
+
+ if (!shader) {
+ return;
+ }
+
+ switch (pname) {
+ case GL_SHADER_TYPE:
+ *params = shader->Type;
+ break;
+ case GL_DELETE_STATUS:
+ *params = shader->DeletePending;
+ break;
+ case GL_COMPILE_STATUS:
+ *params = shader->CompileStatus;
+ break;
+ case GL_INFO_LOG_LENGTH:
+ *params = shader->InfoLog ? strlen(shader->InfoLog) + 1 : 0;
+ break;
+ case GL_SHADER_SOURCE_LENGTH:
+ *params = shader->Source ? strlen((char *) shader->Source) + 1 : 0;
+ break;
+ default:
+ _mesa_error(ctx, GL_INVALID_ENUM, "glGetShaderiv(pname)");
+ return;
+ }
+ }
+
+
+ static void
+ get_program_info_log(GLcontext *ctx, GLuint program, GLsizei bufSize,
+ GLsizei *length, GLchar *infoLog)
+ {
+ struct gl_shader_program *shProg
+ = _mesa_lookup_shader_program(ctx, program);
+ if (!shProg) {
+ _mesa_error(ctx, GL_INVALID_VALUE, "glGetProgramInfoLog(program)");
+ return;
+ }
+ _mesa_copy_string(infoLog, bufSize, length, shProg->InfoLog);
+ }
+
+
+ static void
+ get_shader_info_log(GLcontext *ctx, GLuint shader, GLsizei bufSize,
+ GLsizei *length, GLchar *infoLog)
+ {
+ struct gl_shader *sh = _mesa_lookup_shader(ctx, shader);
+ if (!sh) {
+ _mesa_error(ctx, GL_INVALID_VALUE, "glGetShaderInfoLog(shader)");
+ return;
+ }
+ _mesa_copy_string(infoLog, bufSize, length, sh->InfoLog);
+ }
+
+
+ /**
+ * Return shader source code.
+ */
+ static void
+ get_shader_source(GLcontext *ctx, GLuint shader, GLsizei maxLength,
+ GLsizei *length, GLchar *sourceOut)
+ {
+ struct gl_shader *sh;
+ sh = _mesa_lookup_shader_err(ctx, shader, "glGetShaderSource");
+ if (!sh) {
+ return;
+ }
+ _mesa_copy_string(sourceOut, maxLength, length, sh->Source);
+ }
+
+
+ /**
+ * Set/replace shader source code.
+ */
+ static void
+ shader_source(GLcontext *ctx, GLuint shader, const GLchar *source)
+ {
+ struct gl_shader *sh;
+
+ sh = _mesa_lookup_shader_err(ctx, shader, "glShaderSource");
+ if (!sh)
+ return;
+
+ /* free old shader source string and install new one */
+ if (sh->Source) {
+ free((void *) sh->Source);
+ }
+ sh->Source = source;
+ sh->CompileStatus = GL_FALSE;
+ #ifdef DEBUG
+ sh->SourceChecksum = _mesa_str_checksum(sh->Source);
+ #endif
+ }
+
+
+ /**
+ * Compile a shader.
+ */
+ static void
+ compile_shader(GLcontext *ctx, GLuint shaderObj)
+ {
+ struct gl_shader *sh;
+
+ sh = _mesa_lookup_shader_err(ctx, shaderObj, "glCompileShader");
+ if (!sh)
+ return;
+
+ /* set default pragma state for shader */
+ sh->Pragmas = ctx->Shader.DefaultPragmas;
+
+ /* this call will set the sh->CompileStatus field to indicate if
+ * compilation was successful.
+ */
- _slang_link(ctx, program, shProg);
++ _mesa_glsl_compile_shader(ctx, sh);
+ }
+
+
+ /**
+ * Link a program's shaders.
+ */
+ static void
+ link_program(GLcontext *ctx, GLuint program)
+ {
+ struct gl_shader_program *shProg;
+ struct gl_transform_feedback_object *obj =
+ ctx->TransformFeedback.CurrentObject;
+
+ shProg = _mesa_lookup_shader_program_err(ctx, program, "glLinkProgram");
+ if (!shProg)
+ return;
+
+ if (obj->Active && shProg == ctx->Shader.CurrentProgram) {
+ _mesa_error(ctx, GL_INVALID_OPERATION,
+ "glLinkProgram(transform feedback active");
+ return;
+ }
+
+ FLUSH_VERTICES(ctx, _NEW_PROGRAM);
+
- free(shProg->InfoLog);
++ _mesa_glsl_link_shader(ctx, shProg);
+
+ /* debug code */
+ if (0) {
+ GLuint i;
+
+ printf("Link %u shaders in program %u: %s\n",
+ shProg->NumShaders, shProg->Name,
+ shProg->LinkStatus ? "Success" : "Failed");
+
+ for (i = 0; i < shProg->NumShaders; i++) {
+ printf(" shader %u, type 0x%x\n",
+ shProg->Shaders[i]->Name,
+ shProg->Shaders[i]->Type);
+ }
+ }
+ }
+
+
+ /**
+ * Print basic shader info (for debug).
+ */
+ static void
+ print_shader_info(const struct gl_shader_program *shProg)
+ {
+ GLuint i;
+
+ printf("Mesa: glUseProgram(%u)\n", shProg->Name);
+ for (i = 0; i < shProg->NumShaders; i++) {
+ const char *s;
+ switch (shProg->Shaders[i]->Type) {
+ case GL_VERTEX_SHADER:
+ s = "vertex";
+ break;
+ case GL_FRAGMENT_SHADER:
+ s = "fragment";
+ break;
+ case GL_GEOMETRY_SHADER:
+ s = "geometry";
+ break;
+ default:
+ s = "";
+ }
+ printf(" %s shader %u, checksum %u\n", s,
+ shProg->Shaders[i]->Name,
+ shProg->Shaders[i]->SourceChecksum);
+ }
+ if (shProg->VertexProgram)
+ printf(" vert prog %u\n", shProg->VertexProgram->Base.Id);
+ if (shProg->FragmentProgram)
+ printf(" frag prog %u\n", shProg->FragmentProgram->Base.Id);
+ }
+
+
+ /**
+ * Use the named shader program for subsequent rendering.
+ */
+ void
+ _mesa_use_program(GLcontext *ctx, GLuint program)
+ {
+ struct gl_shader_program *shProg;
+ struct gl_transform_feedback_object *obj =
+ ctx->TransformFeedback.CurrentObject;
+
+ if (obj->Active) {
+ _mesa_error(ctx, GL_INVALID_OPERATION,
+ "glUseProgram(transform feedback active)");
+ return;
+ }
+
+ if (ctx->Shader.CurrentProgram &&
+ ctx->Shader.CurrentProgram->Name == program) {
+ /* no-op */
+ return;
+ }
+
+ if (program) {
+ shProg = _mesa_lookup_shader_program_err(ctx, program, "glUseProgram");
+ if (!shProg) {
+ return;
+ }
+ if (!shProg->LinkStatus) {
+ _mesa_error(ctx, GL_INVALID_OPERATION,
+ "glUseProgram(program %u not linked)", program);
+ return;
+ }
+
+ /* debug code */
+ if (ctx->Shader.Flags & GLSL_USE_PROG) {
+ print_shader_info(shProg);
+ }
+ }
+ else {
+ shProg = NULL;
+ }
+
+ if (ctx->Shader.CurrentProgram != shProg) {
+ FLUSH_VERTICES(ctx, _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS);
+ _mesa_reference_shader_program(ctx, &ctx->Shader.CurrentProgram, shProg);
+ }
+
+ if (ctx->Driver.UseProgram)
+ ctx->Driver.UseProgram(ctx, shProg);
+ }
+
+
+ /**
+ * Validate a program's samplers.
+ * Specifically, check that there aren't two samplers of different types
+ * pointing to the same texture unit.
+ * \return GL_TRUE if valid, GL_FALSE if invalid
+ */
+ static GLboolean
+ validate_samplers(GLcontext *ctx, const struct gl_program *prog, char *errMsg)
+ {
+ static const char *targetName[] = {
+ "TEXTURE_2D_ARRAY",
+ "TEXTURE_1D_ARRAY",
+ "TEXTURE_CUBE",
+ "TEXTURE_3D",
+ "TEXTURE_RECT",
+ "TEXTURE_2D",
+ "TEXTURE_1D",
+ };
+ GLint targetUsed[MAX_TEXTURE_IMAGE_UNITS];
+ GLbitfield samplersUsed = prog->SamplersUsed;
+ GLuint i;
+
+ assert(Elements(targetName) == NUM_TEXTURE_TARGETS);
+
+ if (samplersUsed == 0x0)
+ return GL_TRUE;
+
+ for (i = 0; i < Elements(targetUsed); i++)
+ targetUsed[i] = -1;
+
+ /* walk over bits which are set in 'samplers' */
+ while (samplersUsed) {
+ GLuint unit;
+ gl_texture_index target;
+ GLint sampler = _mesa_ffs(samplersUsed) - 1;
+ assert(sampler >= 0);
+ assert(sampler < MAX_TEXTURE_IMAGE_UNITS);
+ unit = prog->SamplerUnits[sampler];
+ target = prog->SamplerTargets[sampler];
+ if (targetUsed[unit] != -1 && targetUsed[unit] != target) {
+ _mesa_snprintf(errMsg, 100,
+ "Texture unit %d is accessed both as %s and %s",
+ unit, targetName[targetUsed[unit]], targetName[target]);
+ return GL_FALSE;
+ }
+ targetUsed[unit] = target;
+ samplersUsed ^= (1 << sampler);
+ }
+
+ return GL_TRUE;
+ }
+
+
+ /**
+ * Do validation of the given shader program.
+ * \param errMsg returns error message if validation fails.
+ * \return GL_TRUE if valid, GL_FALSE if invalid (and set errMsg)
+ */
+ static GLboolean
+ validate_shader_program(GLcontext *ctx,
+ const struct gl_shader_program *shProg,
+ char *errMsg)
+ {
+ const struct gl_vertex_program *vp = shProg->VertexProgram;
+ const struct gl_fragment_program *fp = shProg->FragmentProgram;
+
+ if (!shProg->LinkStatus) {
+ return GL_FALSE;
+ }
+
+ /* From the GL spec, a program is invalid if any of these are true:
+
+ any two active samplers in the current program object are of
+ different types, but refer to the same texture image unit,
+
+ any active sampler in the current program object refers to a texture
+ image unit where fixed-function fragment processing accesses a
+ texture target that does not match the sampler type, or
+
+ the sum of the number of active samplers in the program and the
+ number of texture image units enabled for fixed-function fragment
+ processing exceeds the combined limit on the total number of texture
+ image units allowed.
+ */
+
+
+ /*
+ * Check: any two active samplers in the current program object are of
+ * different types, but refer to the same texture image unit,
+ */
+ if (vp && !validate_samplers(ctx, &vp->Base, errMsg)) {
+ return GL_FALSE;
+ }
+ if (fp && !validate_samplers(ctx, &fp->Base, errMsg)) {
+ return GL_FALSE;
+ }
+
+ return GL_TRUE;
+ }
+
+
+ /**
+ * Called via glValidateProgram()
+ */
+ static void
+ validate_program(GLcontext *ctx, GLuint program)
+ {
+ struct gl_shader_program *shProg;
+ char errMsg[100];
+
+ shProg = _mesa_lookup_shader_program_err(ctx, program, "glValidateProgram");
+ if (!shProg) {
+ return;
+ }
+
+ shProg->Validated = validate_shader_program(ctx, shProg, errMsg);
+ if (!shProg->Validated) {
+ /* update info log */
+ if (shProg->InfoLog) {
- shProg->InfoLog = _mesa_strdup(errMsg);
++ talloc_free(shProg->InfoLog);
+ }
++ shProg->InfoLog = talloc_strdup(shProg, errMsg);
+ }
+ }
+
+
+
+ void GLAPIENTRY
+ _mesa_AttachObjectARB(GLhandleARB program, GLhandleARB shader)
+ {
+ GET_CURRENT_CONTEXT(ctx);
+ attach_shader(ctx, program, shader);
+ }
+
+
+ void GLAPIENTRY
+ _mesa_AttachShader(GLuint program, GLuint shader)
+ {
+ GET_CURRENT_CONTEXT(ctx);
+ attach_shader(ctx, program, shader);
+ }
+
+
+ void GLAPIENTRY
+ _mesa_BindAttribLocationARB(GLhandleARB program, GLuint index,
+ const GLcharARB *name)
+ {
+ GET_CURRENT_CONTEXT(ctx);
+ bind_attrib_location(ctx, program, index, name);
+ }
+
+
+ void GLAPIENTRY
+ _mesa_CompileShaderARB(GLhandleARB shaderObj)
+ {
+ GET_CURRENT_CONTEXT(ctx);
+ compile_shader(ctx, shaderObj);
+ }
+
+
+ GLuint GLAPIENTRY
+ _mesa_CreateShader(GLenum type)
+ {
+ GET_CURRENT_CONTEXT(ctx);
+ return create_shader(ctx, type);
+ }
+
+
+ GLhandleARB GLAPIENTRY
+ _mesa_CreateShaderObjectARB(GLenum type)
+ {
+ GET_CURRENT_CONTEXT(ctx);
+ return create_shader(ctx, type);
+ }
+
+
+ GLuint GLAPIENTRY
+ _mesa_CreateProgram(void)
+ {
+ GET_CURRENT_CONTEXT(ctx);
+ return create_shader_program(ctx);
+ }
+
+
+ GLhandleARB GLAPIENTRY
+ _mesa_CreateProgramObjectARB(void)
+ {
+ GET_CURRENT_CONTEXT(ctx);
+ return create_shader_program(ctx);
+ }
+
+
+ void GLAPIENTRY
+ _mesa_DeleteObjectARB(GLhandleARB obj)
+ {
+ if (obj) {
+ GET_CURRENT_CONTEXT(ctx);
+ if (is_program(ctx, obj)) {
+ delete_shader_program(ctx, obj);
+ }
+ else if (is_shader(ctx, obj)) {
+ delete_shader(ctx, obj);
+ }
+ else {
+ /* error? */
+ }
+ }
+ }
+
+
+ void GLAPIENTRY
+ _mesa_DeleteProgram(GLuint name)
+ {
+ if (name) {
+ GET_CURRENT_CONTEXT(ctx);
+ delete_shader_program(ctx, name);
+ }
+ }
+
+
+ void GLAPIENTRY
+ _mesa_DeleteShader(GLuint name)
+ {
+ if (name) {
+ GET_CURRENT_CONTEXT(ctx);
+ delete_shader(ctx, name);
+ }
+ }
+
+
+ void GLAPIENTRY
+ _mesa_DetachObjectARB(GLhandleARB program, GLhandleARB shader)
+ {
+ GET_CURRENT_CONTEXT(ctx);
+ detach_shader(ctx, program, shader);
+ }
+
+
+ void GLAPIENTRY
+ _mesa_DetachShader(GLuint program, GLuint shader)
+ {
+ GET_CURRENT_CONTEXT(ctx);
+ detach_shader(ctx, program, shader);
+ }
+
+
+ void GLAPIENTRY
+ _mesa_GetActiveAttribARB(GLhandleARB program, GLuint index,
+ GLsizei maxLength, GLsizei * length, GLint * size,
+ GLenum * type, GLcharARB * name)
+ {
+ GET_CURRENT_CONTEXT(ctx);
+ get_active_attrib(ctx, program, index, maxLength, length, size, type, name);
+ }
+
+
+ void GLAPIENTRY
+ _mesa_GetAttachedObjectsARB(GLhandleARB container, GLsizei maxCount,
+ GLsizei * count, GLhandleARB * obj)
+ {
+ GET_CURRENT_CONTEXT(ctx);
+ get_attached_shaders(ctx, container, maxCount, count, obj);
+ }
+
+
+ void GLAPIENTRY
+ _mesa_GetAttachedShaders(GLuint program, GLsizei maxCount,
+ GLsizei *count, GLuint *obj)
+ {
+ GET_CURRENT_CONTEXT(ctx);
+ get_attached_shaders(ctx, program, maxCount, count, obj);
+ }
+
+
+ GLint GLAPIENTRY
+ _mesa_GetAttribLocationARB(GLhandleARB program, const GLcharARB * name)
+ {
+ GET_CURRENT_CONTEXT(ctx);
+ return get_attrib_location(ctx, program, name);
+ }
+
+
+ void GLAPIENTRY
+ _mesa_GetInfoLogARB(GLhandleARB object, GLsizei maxLength, GLsizei * length,
+ GLcharARB * infoLog)
+ {
+ GET_CURRENT_CONTEXT(ctx);
+ if (is_program(ctx, object)) {
+ get_program_info_log(ctx, object, maxLength, length, infoLog);
+ }
+ else if (is_shader(ctx, object)) {
+ get_shader_info_log(ctx, object, maxLength, length, infoLog);
+ }
+ else {
+ _mesa_error(ctx, GL_INVALID_OPERATION, "glGetInfoLogARB");
+ }
+ }
+
+
+ void GLAPIENTRY
+ _mesa_GetObjectParameterivARB(GLhandleARB object, GLenum pname, GLint *params)
+ {
+ GET_CURRENT_CONTEXT(ctx);
+ /* Implement in terms of GetProgramiv, GetShaderiv */
+ if (is_program(ctx, object)) {
+ if (pname == GL_OBJECT_TYPE_ARB) {
+ *params = GL_PROGRAM_OBJECT_ARB;
+ }
+ else {
+ get_programiv(ctx, object, pname, params);
+ }
+ }
+ else if (is_shader(ctx, object)) {
+ if (pname == GL_OBJECT_TYPE_ARB) {
+ *params = GL_SHADER_OBJECT_ARB;
+ }
+ else {
+ get_shaderiv(ctx, object, pname, params);
+ }
+ }
+ else {
+ _mesa_error(ctx, GL_INVALID_VALUE, "glGetObjectParameterivARB");
+ }
+ }
+
+
+ void GLAPIENTRY
+ _mesa_GetObjectParameterfvARB(GLhandleARB object, GLenum pname,
+ GLfloat *params)
+ {
+ GLint iparams[1]; /* XXX is one element enough? */
+ _mesa_GetObjectParameterivARB(object, pname, iparams);
+ params[0] = (GLfloat) iparams[0];
+ }
+
+
+ void GLAPIENTRY
+ _mesa_GetProgramiv(GLuint program, GLenum pname, GLint *params)
+ {
+ GET_CURRENT_CONTEXT(ctx);
+ get_programiv(ctx, program, pname, params);
+ }
+
+
+ void GLAPIENTRY
+ _mesa_GetShaderiv(GLuint shader, GLenum pname, GLint *params)
+ {
+ GET_CURRENT_CONTEXT(ctx);
+ get_shaderiv(ctx, shader, pname, params);
+ }
+
+
+ void GLAPIENTRY
+ _mesa_GetProgramInfoLog(GLuint program, GLsizei bufSize,
+ GLsizei *length, GLchar *infoLog)
+ {
+ GET_CURRENT_CONTEXT(ctx);
+ get_program_info_log(ctx, program, bufSize, length, infoLog);
+ }
+
+
+ void GLAPIENTRY
+ _mesa_GetShaderInfoLog(GLuint shader, GLsizei bufSize,
+ GLsizei *length, GLchar *infoLog)
+ {
+ GET_CURRENT_CONTEXT(ctx);
+ get_shader_info_log(ctx, shader, bufSize, length, infoLog);
+ }
+
+
+ void GLAPIENTRY
+ _mesa_GetShaderSourceARB(GLhandleARB shader, GLsizei maxLength,
+ GLsizei *length, GLcharARB *sourceOut)
+ {
+ GET_CURRENT_CONTEXT(ctx);
+ get_shader_source(ctx, shader, maxLength, length, sourceOut);
+ }
+
+
+ GLhandleARB GLAPIENTRY
+ _mesa_GetHandleARB(GLenum pname)
+ {
+ GET_CURRENT_CONTEXT(ctx);
+ return get_handle(ctx, pname);
+ }
+
+
+ GLboolean GLAPIENTRY
+ _mesa_IsProgram(GLuint name)
+ {
+ GET_CURRENT_CONTEXT(ctx);
+ return is_program(ctx, name);
+ }
+
+
+ GLboolean GLAPIENTRY
+ _mesa_IsShader(GLuint name)
+ {
+ GET_CURRENT_CONTEXT(ctx);
+ return is_shader(ctx, name);
+ }
+
+
+ void GLAPIENTRY
+ _mesa_LinkProgramARB(GLhandleARB programObj)
+ {
+ GET_CURRENT_CONTEXT(ctx);
+ link_program(ctx, programObj);
+ }
+
+
+
+ /**
+ * Read shader source code from a file.
+ * Useful for debugging to override an app's shader.
+ */
+ static GLcharARB *
+ read_shader(const char *fname)
+ {
+ const int max = 50*1000;
+ FILE *f = fopen(fname, "r");
+ GLcharARB *buffer, *shader;
+ int len;
+
+ if (!f) {
+ return NULL;
+ }
+
+ buffer = (char *) malloc(max);
+ len = fread(buffer, 1, max, f);
+ buffer[len] = 0;
+
+ fclose(f);
+
+ shader = _mesa_strdup(buffer);
+ free(buffer);
+
+ return shader;
+ }
+
+
+ /**
+ * Called via glShaderSource() and glShaderSourceARB() API functions.
+ * Basically, concatenate the source code strings into one long string
+ * and pass it to _mesa_shader_source().
+ */
+ void GLAPIENTRY
+ _mesa_ShaderSourceARB(GLhandleARB shaderObj, GLsizei count,
+ const GLcharARB ** string, const GLint * length)
+ {
+ GET_CURRENT_CONTEXT(ctx);
+ GLint *offsets;
+ GLsizei i, totalLength;
+ GLcharARB *source;
+ GLuint checksum;
+
+ if (!shaderObj || string == NULL) {
+ _mesa_error(ctx, GL_INVALID_VALUE, "glShaderSourceARB");
+ return;
+ }
+
+ /*
+ * This array holds offsets of where the appropriate string ends, thus the
+ * last element will be set to the total length of the source code.
+ */
+ offsets = (GLint *) malloc(count * sizeof(GLint));
+ if (offsets == NULL) {
+ _mesa_error(ctx, GL_OUT_OF_MEMORY, "glShaderSourceARB");
+ return;
+ }
+
+ for (i = 0; i < count; i++) {
+ if (string[i] == NULL) {
+ free((GLvoid *) offsets);
+ _mesa_error(ctx, GL_INVALID_OPERATION, "glShaderSourceARB(null string)");
+ return;
+ }
+ if (length == NULL || length[i] < 0)
+ offsets[i] = strlen(string[i]);
+ else
+ offsets[i] = length[i];
+ /* accumulate string lengths */
+ if (i > 0)
+ offsets[i] += offsets[i - 1];
+ }
+
+ /* Total length of source string is sum off all strings plus two.
+ * One extra byte for terminating zero, another extra byte to silence
+ * valgrind warnings in the parser/grammer code.
+ */
+ totalLength = offsets[count - 1] + 2;
+ source = (GLcharARB *) malloc(totalLength * sizeof(GLcharARB));
+ if (source == NULL) {
+ free((GLvoid *) offsets);
+ _mesa_error(ctx, GL_OUT_OF_MEMORY, "glShaderSourceARB");
+ return;
+ }
+
+ for (i = 0; i < count; i++) {
+ GLint start = (i > 0) ? offsets[i - 1] : 0;
+ memcpy(source + start, string[i],
+ (offsets[i] - start) * sizeof(GLcharARB));
+ }
+ source[totalLength - 1] = '\0';
+ source[totalLength - 2] = '\0';
+
+ if (SHADER_SUBST) {
+ /* Compute the shader's source code checksum then try to open a file
+ * named newshader_<CHECKSUM>. If it exists, use it in place of the
+ * original shader source code. For debugging.
+ */
+ char filename[100];
+ GLcharARB *newSource;
+
+ checksum = _mesa_str_checksum(source);
+
+ _mesa_snprintf(filename, sizeof(filename), "newshader_%d", checksum);
+
+ newSource = read_shader(filename);
+ if (newSource) {
+ fprintf(stderr, "Mesa: Replacing shader %u chksum=%d with %s\n",
+ shaderObj, checksum, filename);
+ free(source);
+ source = newSource;
+ }
+ }
+
+ shader_source(ctx, shaderObj, source);
+
+ if (SHADER_SUBST) {
+ struct gl_shader *sh = _mesa_lookup_shader(ctx, shaderObj);
+ if (sh)
+ sh->SourceChecksum = checksum; /* save original checksum */
+ }
+
+ free(offsets);
+ }
+
+
+ void GLAPIENTRY
+ _mesa_UseProgramObjectARB(GLhandleARB program)
+ {
+ GET_CURRENT_CONTEXT(ctx);
+ FLUSH_VERTICES(ctx, _NEW_PROGRAM);
+ _mesa_use_program(ctx, program);
+ }
+
+
+ void GLAPIENTRY
+ _mesa_ValidateProgramARB(GLhandleARB program)
+ {
+ GET_CURRENT_CONTEXT(ctx);
+ validate_program(ctx, program);
+ }
+
+ #ifdef FEATURE_ES2
+
+ void GLAPIENTRY
+ _mesa_GetShaderPrecisionFormat(GLenum shadertype, GLenum precisiontype,
+ GLint* range, GLint* precision)
+ {
+ GET_CURRENT_CONTEXT(ctx);
+ _mesa_error(ctx, GL_INVALID_OPERATION, __FUNCTION__);
+ }
+
+
+ void GLAPIENTRY
+ _mesa_ReleaseShaderCompiler(void)
+ {
+ GET_CURRENT_CONTEXT(ctx);
+ _mesa_error(ctx, GL_INVALID_OPERATION, __FUNCTION__);
+ }
+
+
+ void GLAPIENTRY
+ _mesa_ShaderBinary(GLint n, const GLuint* shaders, GLenum binaryformat,
+ const void* binary, GLint length)
+ {
+ GET_CURRENT_CONTEXT(ctx);
+ _mesa_error(ctx, GL_INVALID_OPERATION, __FUNCTION__);
+ }
+
+ #endif /* FEATURE_ES2 */
+
+
+ #if FEATURE_ARB_geometry_shader4
+
+ void GLAPIENTRY
+ _mesa_ProgramParameteriARB(GLuint program, GLenum pname,
+ GLint value)
+ {
+ struct gl_shader_program *shProg;
+ GET_CURRENT_CONTEXT(ctx);
+
+ ASSERT_OUTSIDE_BEGIN_END(ctx);
+
+ shProg = _mesa_lookup_shader_program_err(ctx, program,
+ "glProgramParameteri");
+ if (!shProg)
+ return;
+
+ switch (pname) {
+ case GL_GEOMETRY_VERTICES_OUT_ARB:
+ if (value < 1 ||
+ value > ctx->Const.GeometryProgram.MaxGeometryOutputVertices) {
+ _mesa_error(ctx, GL_INVALID_VALUE,
+ "glProgramParameteri(GL_GEOMETRY_VERTICES_OUT_ARB=%d",
+ value);
+ return;
+ }
+ shProg->Geom.VerticesOut = value;
+ break;
+ case GL_GEOMETRY_INPUT_TYPE_ARB:
+ switch (value) {
+ case GL_POINTS:
+ case GL_LINES:
+ case GL_LINES_ADJACENCY_ARB:
+ case GL_TRIANGLES:
+ case GL_TRIANGLES_ADJACENCY_ARB:
+ shProg->Geom.InputType = value;
+ break;
+ default:
+ _mesa_error(ctx, GL_INVALID_VALUE,
+ "glProgramParameteri(geometry input type = %s",
+ _mesa_lookup_enum_by_nr(value));
+ return;
+ }
+ break;
+ case GL_GEOMETRY_OUTPUT_TYPE_ARB:
+ switch (value) {
+ case GL_POINTS:
+ case GL_LINE_STRIP:
+ case GL_TRIANGLE_STRIP:
+ shProg->Geom.OutputType = value;
+ break;
+ default:
+ _mesa_error(ctx, GL_INVALID_VALUE,
+ "glProgramParameteri(geometry output type = %s",
+ _mesa_lookup_enum_by_nr(value));
+ return;
+ }
+ break;
+ default:
+ _mesa_error(ctx, GL_INVALID_ENUM, "glProgramParameteriARB(pname=%s)",
+ _mesa_lookup_enum_by_nr(pname));
+ break;
+ }
+ }
+
+ #endif
+
+
+ /**
+ * Plug in shader-related functions into API dispatch table.
+ */
+ void
+ _mesa_init_shader_dispatch(struct _glapi_table *exec)
+ {
+ #if FEATURE_GL
+ /* GL_ARB_vertex/fragment_shader */
+ SET_DeleteObjectARB(exec, _mesa_DeleteObjectARB);
+ SET_GetHandleARB(exec, _mesa_GetHandleARB);
+ SET_DetachObjectARB(exec, _mesa_DetachObjectARB);
+ SET_CreateShaderObjectARB(exec, _mesa_CreateShaderObjectARB);
+ SET_ShaderSourceARB(exec, _mesa_ShaderSourceARB);
+ SET_CompileShaderARB(exec, _mesa_CompileShaderARB);
+ SET_CreateProgramObjectARB(exec, _mesa_CreateProgramObjectARB);
+ SET_AttachObjectARB(exec, _mesa_AttachObjectARB);
+ SET_LinkProgramARB(exec, _mesa_LinkProgramARB);
+ SET_UseProgramObjectARB(exec, _mesa_UseProgramObjectARB);
+ SET_ValidateProgramARB(exec, _mesa_ValidateProgramARB);
+ SET_GetObjectParameterfvARB(exec, _mesa_GetObjectParameterfvARB);
+ SET_GetObjectParameterivARB(exec, _mesa_GetObjectParameterivARB);
+ SET_GetInfoLogARB(exec, _mesa_GetInfoLogARB);
+ SET_GetAttachedObjectsARB(exec, _mesa_GetAttachedObjectsARB);
+ SET_GetShaderSourceARB(exec, _mesa_GetShaderSourceARB);
+
+ /* OpenGL 2.0 */
+ SET_AttachShader(exec, _mesa_AttachShader);
+ SET_CreateProgram(exec, _mesa_CreateProgram);
+ SET_CreateShader(exec, _mesa_CreateShader);
+ SET_DeleteProgram(exec, _mesa_DeleteProgram);
+ SET_DeleteShader(exec, _mesa_DeleteShader);
+ SET_DetachShader(exec, _mesa_DetachShader);
+ SET_GetAttachedShaders(exec, _mesa_GetAttachedShaders);
+ SET_GetProgramiv(exec, _mesa_GetProgramiv);
+ SET_GetProgramInfoLog(exec, _mesa_GetProgramInfoLog);
+ SET_GetShaderiv(exec, _mesa_GetShaderiv);
+ SET_GetShaderInfoLog(exec, _mesa_GetShaderInfoLog);
+ SET_IsProgram(exec, _mesa_IsProgram);
+ SET_IsShader(exec, _mesa_IsShader);
+
+ #if FEATURE_ARB_vertex_shader
+ SET_BindAttribLocationARB(exec, _mesa_BindAttribLocationARB);
+ SET_GetActiveAttribARB(exec, _mesa_GetActiveAttribARB);
+ SET_GetAttribLocationARB(exec, _mesa_GetAttribLocationARB);
+ #endif
+
+ #if FEATURE_ARB_geometry_shader4
+ SET_ProgramParameteriARB(exec, _mesa_ProgramParameteriARB);
+ #endif
+ #endif /* FEATURE_GL */
+ }
+
--- /dev/null
-
+ /*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 2004-2008 Brian Paul All Rights Reserved.
+ * Copyright (C) 2009-2010 VMware, Inc. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+ /**
+ * \file shaderobj.c
+ * \author Brian Paul
+ *
+ */
+
+
+ #include "main/glheader.h"
+ #include "main/context.h"
+ #include "main/hash.h"
+ #include "main/shaderobj.h"
+ #include "program/program.h"
+ #include "program/prog_parameter.h"
+ #include "program/prog_uniform.h"
-static struct gl_shader *
++#include "talloc.h"
+
+ /**********************************************************************/
+ /*** Shader object functions ***/
+ /**********************************************************************/
+
+
+ /**
+ * Set ptr to point to sh.
+ * If ptr is pointing to another shader, decrement its refcount (and delete
+ * if refcount hits zero).
+ * Then set ptr to point to sh, incrementing its refcount.
+ */
+ void
+ _mesa_reference_shader(GLcontext *ctx, struct gl_shader **ptr,
+ struct gl_shader *sh)
+ {
+ assert(ptr);
+ if (*ptr == sh) {
+ /* no-op */
+ return;
+ }
+ if (*ptr) {
+ /* Unreference the old shader */
+ GLboolean deleteFlag = GL_FALSE;
+ struct gl_shader *old = *ptr;
+
+ ASSERT(old->RefCount > 0);
+ old->RefCount--;
+ /*printf("SHADER DECR %p (%d) to %d\n",
+ (void*) old, old->Name, old->RefCount);*/
+ deleteFlag = (old->RefCount == 0);
+
+ if (deleteFlag) {
+ _mesa_HashRemove(ctx->Shared->ShaderObjects, old->Name);
+ ctx->Driver.DeleteShader(ctx, old);
+ }
+
+ *ptr = NULL;
+ }
+ assert(!*ptr);
+
+ if (sh) {
+ /* reference new */
+ sh->RefCount++;
+ /*printf("SHADER INCR %p (%d) to %d\n",
+ (void*) sh, sh->Name, sh->RefCount);*/
+ *ptr = sh;
+ }
+ }
+
+
+ /**
+ * Allocate a new gl_shader object, initialize it.
+ * Called via ctx->Driver.NewShader()
+ */
- shader = CALLOC_STRUCT(gl_shader);
++struct gl_shader *
+ _mesa_new_shader(GLcontext *ctx, GLuint name, GLenum type)
+ {
+ struct gl_shader *shader;
+ assert(type == GL_FRAGMENT_SHADER || type == GL_VERTEX_SHADER ||
+ type == GL_GEOMETRY_SHADER_ARB);
- if (sh->InfoLog)
- free(sh->InfoLog);
++ shader = talloc_zero(NULL, struct gl_shader);
+ if (shader) {
+ shader->Type = type;
+ shader->Name = name;
+ shader->RefCount = 1;
+ }
+ return shader;
+ }
+
+
+ /**
+ * Delete a shader object.
+ * Called via ctx->Driver.DeleteShader().
+ */
+ static void
+ __mesa_delete_shader(GLcontext *ctx, struct gl_shader *sh)
+ {
+ if (sh->Source)
+ free((void *) sh->Source);
- free(sh);
+ _mesa_reference_program(ctx, &sh->Program, NULL);
- shProg = CALLOC_STRUCT(gl_shader_program);
++ talloc_free(sh);
+ }
+
+
+ /**
+ * Lookup a GLSL shader object.
+ */
+ struct gl_shader *
+ _mesa_lookup_shader(GLcontext *ctx, GLuint name)
+ {
+ if (name) {
+ struct gl_shader *sh = (struct gl_shader *)
+ _mesa_HashLookup(ctx->Shared->ShaderObjects, name);
+ /* Note that both gl_shader and gl_shader_program objects are kept
+ * in the same hash table. Check the object's type to be sure it's
+ * what we're expecting.
+ */
+ if (sh && sh->Type == GL_SHADER_PROGRAM_MESA) {
+ return NULL;
+ }
+ return sh;
+ }
+ return NULL;
+ }
+
+
+ /**
+ * As above, but record an error if shader is not found.
+ */
+ struct gl_shader *
+ _mesa_lookup_shader_err(GLcontext *ctx, GLuint name, const char *caller)
+ {
+ if (!name) {
+ _mesa_error(ctx, GL_INVALID_VALUE, caller);
+ return NULL;
+ }
+ else {
+ struct gl_shader *sh = (struct gl_shader *)
+ _mesa_HashLookup(ctx->Shared->ShaderObjects, name);
+ if (!sh) {
+ _mesa_error(ctx, GL_INVALID_VALUE, caller);
+ return NULL;
+ }
+ if (sh->Type == GL_SHADER_PROGRAM_MESA) {
+ _mesa_error(ctx, GL_INVALID_OPERATION, caller);
+ return NULL;
+ }
+ return sh;
+ }
+ }
+
+
+
+ /**********************************************************************/
+ /*** Shader Program object functions ***/
+ /**********************************************************************/
+
+
+ /**
+ * Set ptr to point to shProg.
+ * If ptr is pointing to another object, decrement its refcount (and delete
+ * if refcount hits zero).
+ * Then set ptr to point to shProg, incrementing its refcount.
+ */
+ void
+ _mesa_reference_shader_program(GLcontext *ctx,
+ struct gl_shader_program **ptr,
+ struct gl_shader_program *shProg)
+ {
+ assert(ptr);
+ if (*ptr == shProg) {
+ /* no-op */
+ return;
+ }
+ if (*ptr) {
+ /* Unreference the old shader program */
+ GLboolean deleteFlag = GL_FALSE;
+ struct gl_shader_program *old = *ptr;
+
+ ASSERT(old->RefCount > 0);
+ old->RefCount--;
+ #if 0
+ printf("ShaderProgram %p ID=%u RefCount-- to %d\n",
+ (void *) old, old->Name, old->RefCount);
+ #endif
+ deleteFlag = (old->RefCount == 0);
+
+ if (deleteFlag) {
+ _mesa_HashRemove(ctx->Shared->ShaderObjects, old->Name);
+ ctx->Driver.DeleteShaderProgram(ctx, old);
+ }
+
+ *ptr = NULL;
+ }
+ assert(!*ptr);
+
+ if (shProg) {
+ shProg->RefCount++;
+ #if 0
+ printf("ShaderProgram %p ID=%u RefCount++ to %d\n",
+ (void *) shProg, shProg->Name, shProg->RefCount);
+ #endif
+ *ptr = shProg;
+ }
+ }
+
+
+ /**
+ * Allocate a new gl_shader_program object, initialize it.
+ * Called via ctx->Driver.NewShaderProgram()
+ */
+ static struct gl_shader_program *
+ _mesa_new_shader_program(GLcontext *ctx, GLuint name)
+ {
+ struct gl_shader_program *shProg;
- free(shProg->InfoLog);
++ shProg = talloc_zero(NULL, struct gl_shader_program);
+ if (shProg) {
+ shProg->Type = GL_SHADER_PROGRAM_MESA;
+ shProg->Name = name;
+ shProg->RefCount = 1;
+ shProg->Attributes = _mesa_new_parameter_list();
+ #if FEATURE_ARB_geometry_shader4
+ shProg->Geom.VerticesOut = 0;
+ shProg->Geom.InputType = GL_TRIANGLES;
+ shProg->Geom.OutputType = GL_TRIANGLE_STRIP;
+ #endif
+ }
+ return shProg;
+ }
+
+
+ /**
+ * Clear (free) the shader program state that gets produced by linking.
+ */
+ void
+ _mesa_clear_shader_program_data(GLcontext *ctx,
+ struct gl_shader_program *shProg)
+ {
+ _mesa_reference_vertprog(ctx, &shProg->VertexProgram, NULL);
+ _mesa_reference_fragprog(ctx, &shProg->FragmentProgram, NULL);
+ _mesa_reference_geomprog(ctx, &shProg->GeometryProgram, NULL);
+
+ if (shProg->Uniforms) {
+ _mesa_free_uniform_list(shProg->Uniforms);
+ shProg->Uniforms = NULL;
+ }
+
+ if (shProg->Varying) {
+ _mesa_free_parameter_list(shProg->Varying);
+ shProg->Varying = NULL;
+ }
+ }
+
+
+ /**
+ * Free all the data that hangs off a shader program object, but not the
+ * object itself.
+ */
+ void
+ _mesa_free_shader_program_data(GLcontext *ctx,
+ struct gl_shader_program *shProg)
+ {
+ GLuint i;
+
+ assert(shProg->Type == GL_SHADER_PROGRAM_MESA);
+
+ _mesa_clear_shader_program_data(ctx, shProg);
+
+ if (shProg->Attributes) {
+ _mesa_free_parameter_list(shProg->Attributes);
+ shProg->Attributes = NULL;
+ }
+
+ /* detach shaders */
+ for (i = 0; i < shProg->NumShaders; i++) {
+ _mesa_reference_shader(ctx, &shProg->Shaders[i], NULL);
+ }
+ shProg->NumShaders = 0;
+
+ if (shProg->Shaders) {
+ free(shProg->Shaders);
+ shProg->Shaders = NULL;
+ }
+
+ if (shProg->InfoLog) {
- free(shProg);
++ talloc_free(shProg->InfoLog);
+ shProg->InfoLog = NULL;
+ }
+
+ /* Transform feedback varying vars */
+ for (i = 0; i < shProg->TransformFeedback.NumVarying; i++) {
+ free(shProg->TransformFeedback.VaryingNames[i]);
+ }
+ free(shProg->TransformFeedback.VaryingNames);
+ shProg->TransformFeedback.VaryingNames = NULL;
+ shProg->TransformFeedback.NumVarying = 0;
+ }
+
+
+ /**
+ * Free/delete a shader program object.
+ * Called via ctx->Driver.DeleteShaderProgram().
+ */
+ static void
+ __mesa_delete_shader_program(GLcontext *ctx, struct gl_shader_program *shProg)
+ {
+ _mesa_free_shader_program_data(ctx, shProg);
+
++ talloc_free(shProg);
+ }
+
+
+ /**
+ * Lookup a GLSL program object.
+ */
+ struct gl_shader_program *
+ _mesa_lookup_shader_program(GLcontext *ctx, GLuint name)
+ {
+ struct gl_shader_program *shProg;
+ if (name) {
+ shProg = (struct gl_shader_program *)
+ _mesa_HashLookup(ctx->Shared->ShaderObjects, name);
+ /* Note that both gl_shader and gl_shader_program objects are kept
+ * in the same hash table. Check the object's type to be sure it's
+ * what we're expecting.
+ */
+ if (shProg && shProg->Type != GL_SHADER_PROGRAM_MESA) {
+ return NULL;
+ }
+ return shProg;
+ }
+ return NULL;
+ }
+
+
+ /**
+ * As above, but record an error if program is not found.
+ */
+ struct gl_shader_program *
+ _mesa_lookup_shader_program_err(GLcontext *ctx, GLuint name,
+ const char *caller)
+ {
+ if (!name) {
+ _mesa_error(ctx, GL_INVALID_VALUE, caller);
+ return NULL;
+ }
+ else {
+ struct gl_shader_program *shProg = (struct gl_shader_program *)
+ _mesa_HashLookup(ctx->Shared->ShaderObjects, name);
+ if (!shProg) {
+ _mesa_error(ctx, GL_INVALID_VALUE, caller);
+ return NULL;
+ }
+ if (shProg->Type != GL_SHADER_PROGRAM_MESA) {
+ _mesa_error(ctx, GL_INVALID_OPERATION, caller);
+ return NULL;
+ }
+ return shProg;
+ }
+ }
+
+
+ void
+ _mesa_init_shader_object_functions(struct dd_function_table *driver)
+ {
+ driver->NewShader = _mesa_new_shader;
+ driver->DeleteShader = __mesa_delete_shader;
+ driver->NewShaderProgram = _mesa_new_shader_program;
+ driver->DeleteShaderProgram = __mesa_delete_shader_program;
+ }
--- /dev/null
-#include "glheader.h"
-#include "mtypes.h"
+ /*
+ * Mesa 3-D graphics library
+ * Version: 6.5.3
+ *
+ * Copyright (C) 2004-2007 Brian Paul All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+ #ifndef SHADEROBJ_H
+ #define SHADEROBJ_H
+
+
++#include "main/glheader.h"
++#include "main/mtypes.h"
++#include "program/ir_to_mesa.h"
++
++#ifdef __cplusplus
++extern "C" {
++#endif
++/**
++ * Internal functions
++ */
++
++extern void
++_mesa_init_shader_state(GLcontext * ctx);
++
++extern void
++_mesa_free_shader_state(GLcontext *ctx);
+
+
+ extern void
+ _mesa_reference_shader(GLcontext *ctx, struct gl_shader **ptr,
+ struct gl_shader *sh);
+
+ extern struct gl_shader *
+ _mesa_lookup_shader(GLcontext *ctx, GLuint name);
+
+ extern struct gl_shader *
+ _mesa_lookup_shader_err(GLcontext *ctx, GLuint name, const char *caller);
+
+
+
+ extern void
+ _mesa_reference_shader_program(GLcontext *ctx,
+ struct gl_shader_program **ptr,
+ struct gl_shader_program *shProg);
+
++extern struct gl_shader *
++_mesa_new_shader(GLcontext *ctx, GLuint name, GLenum type);
++
+ extern struct gl_shader_program *
+ _mesa_lookup_shader_program(GLcontext *ctx, GLuint name);
+
+ extern struct gl_shader_program *
+ _mesa_lookup_shader_program_err(GLcontext *ctx, GLuint name,
+ const char *caller);
+
+ extern void
+ _mesa_clear_shader_program_data(GLcontext *ctx,
+ struct gl_shader_program *shProg);
+
+ extern void
+ _mesa_free_shader_program_data(GLcontext *ctx,
+ struct gl_shader_program *shProg);
+
+
+
+ extern void
+ _mesa_init_shader_object_functions(struct dd_function_table *driver);
+
+ extern void
+ _mesa_init_shader_state(GLcontext *ctx);
+
+ extern void
+ _mesa_free_shader_state(GLcontext *ctx);
+
++#ifdef __cplusplus
++};
++#endif
+
+ #endif /* SHADEROBJ_H */
--- /dev/null
+ /*
+ * Copyright © 2008 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+ /**
+ * \file hash_table.c
+ * \brief Implementation of a generic, opaque hash table data type.
+ *
+ * \author Ian Romanick <ian.d.romanick@intel.com>
+ */
+
+ #include "main/imports.h"
+ #include "main/simple_list.h"
+ #include "hash_table.h"
+
+ struct node {
+ struct node *next;
+ struct node *prev;
+ };
+
+ struct hash_table {
+ hash_func_t hash;
+ hash_compare_func_t compare;
+
+ unsigned num_buckets;
+ struct node buckets[1];
+ };
+
+
+ struct hash_node {
+ struct node link;
+ const void *key;
+ void *data;
+ };
+
+
+ struct hash_table *
+ hash_table_ctor(unsigned num_buckets, hash_func_t hash,
+ hash_compare_func_t compare)
+ {
+ struct hash_table *ht;
+ unsigned i;
+
+
+ if (num_buckets < 16) {
+ num_buckets = 16;
+ }
+
+ ht = malloc(sizeof(*ht) + ((num_buckets - 1)
+ * sizeof(ht->buckets[0])));
+ if (ht != NULL) {
+ ht->hash = hash;
+ ht->compare = compare;
+ ht->num_buckets = num_buckets;
+
+ for (i = 0; i < num_buckets; i++) {
+ make_empty_list(& ht->buckets[i]);
+ }
+ }
+
+ return ht;
+ }
+
+
+ void
+ hash_table_dtor(struct hash_table *ht)
+ {
+ hash_table_clear(ht);
+ free(ht);
+ }
+
+
+ void
+ hash_table_clear(struct hash_table *ht)
+ {
+ struct node *node;
+ struct node *temp;
+ unsigned i;
+
+
+ for (i = 0; i < ht->num_buckets; i++) {
+ foreach_s(node, temp, & ht->buckets[i]) {
+ remove_from_list(node);
+ free(node);
+ }
+
+ assert(is_empty_list(& ht->buckets[i]));
+ }
+ }
+
+
+ void *
+ hash_table_find(struct hash_table *ht, const void *key)
+ {
+ const unsigned hash_value = (*ht->hash)(key);
+ const unsigned bucket = hash_value % ht->num_buckets;
+ struct node *node;
+
+ foreach(node, & ht->buckets[bucket]) {
+ struct hash_node *hn = (struct hash_node *) node;
+
+ if ((*ht->compare)(hn->key, key) == 0) {
+ return hn->data;
+ }
+ }
+
+ return NULL;
+ }
+
+
+ void
+ hash_table_insert(struct hash_table *ht, void *data, const void *key)
+ {
+ const unsigned hash_value = (*ht->hash)(key);
+ const unsigned bucket = hash_value % ht->num_buckets;
+ struct hash_node *node;
+
+ node = calloc(1, sizeof(*node));
+
+ node->data = data;
+ node->key = key;
+
+ insert_at_head(& ht->buckets[bucket], & node->link);
+ }
+
++void
++hash_table_remove(struct hash_table *ht, const void *key)
++{
++ const unsigned hash_value = (*ht->hash)(key);
++ const unsigned bucket = hash_value % ht->num_buckets;
++ struct node *node;
++
++ foreach(node, & ht->buckets[bucket]) {
++ struct hash_node *hn = (struct hash_node *) node;
++
++ if ((*ht->compare)(hn->key, key) == 0) {
++ remove_from_list(node);
++ free(node);
++ return;
++ }
++ }
++}
+
+ unsigned
+ hash_table_string_hash(const void *key)
+ {
+ const char *str = (const char *) key;
+ unsigned hash = 5381;
+
+
+ while (*str != '\0') {
+ hash = (hash * 33) + *str;
+ str++;
+ }
+
+ return hash;
+ }
++
++
++unsigned
++hash_table_pointer_hash(const void *key)
++{
++ return (unsigned)((uintptr_t) key / sizeof(void *));
++}
++
++
++int
++hash_table_pointer_compare(const void *key1, const void *key2)
++{
++ return key1 == key2 ? 0 : 1;
++}
--- /dev/null
+ /*
+ * Copyright © 2008 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+ /**
+ * \file hash_table.h
+ * \brief Implementation of a generic, opaque hash table data type.
+ *
+ * \author Ian Romanick <ian.d.romanick@intel.com>
+ */
+
+ #ifndef HASH_TABLE_H
+ #define HASH_TABLE_H
+
+ #include <string.h>
+
+ struct hash_table;
+
+ typedef unsigned (*hash_func_t)(const void *key);
+ typedef int (*hash_compare_func_t)(const void *key1, const void *key2);
+
++#ifdef __cplusplus
++extern "C" {
++#endif
++
+ /**
+ * Hash table constructor
+ *
+ * Creates a hash table with the specified number of buckets. The supplied
+ * \c hash and \c compare routines are used when adding elements to the table
+ * and when searching for elements in the table.
+ *
+ * \param num_buckets Number of buckets (bins) in the hash table.
+ * \param hash Function used to compute hash value of input keys.
+ * \param compare Function used to compare keys.
+ */
+ extern struct hash_table *hash_table_ctor(unsigned num_buckets,
+ hash_func_t hash, hash_compare_func_t compare);
+
+
+ /**
+ * Release all memory associated with a hash table
+ *
+ * \warning
+ * This function cannot release memory occupied either by keys or data.
+ */
+ extern void hash_table_dtor(struct hash_table *ht);
+
+
+ /**
+ * Flush all entries from a hash table
+ *
+ * \param ht Table to be cleared of its entries.
+ */
+ extern void hash_table_clear(struct hash_table *ht);
+
+
+ /**
+ * Search a hash table for a specific element
+ *
+ * \param ht Table to be searched
+ * \param key Key of the desired element
+ *
+ * \return
+ * The \c data value supplied to \c hash_table_insert when the element with
+ * the matching key was added. If no matching key exists in the table,
+ * \c NULL is returned.
+ */
+ extern void *hash_table_find(struct hash_table *ht, const void *key);
+
+
+ /**
+ * Add an element to a hash table
+ */
+ extern void hash_table_insert(struct hash_table *ht, void *data,
+ const void *key);
+
++/**
++ * Remove a specific element from a hash table.
++ */
++extern void hash_table_remove(struct hash_table *ht, const void *key);
+
+ /**
+ * Compute hash value of a string
+ *
+ * Computes the hash value of a string using the DJB2 algorithm developed by
+ * Professor Daniel J. Bernstein. It was published on comp.lang.c once upon
+ * a time. I was unable to find the original posting in the archives.
+ *
+ * \param key Pointer to a NUL terminated string to be hashed.
+ *
+ * \sa hash_table_string_compare
+ */
+ extern unsigned hash_table_string_hash(const void *key);
+
+
+ /**
+ * Compare two strings used as keys
+ *
+ * This is just a macro wrapper around \c strcmp.
+ *
+ * \sa hash_table_string_hash
+ */
+ #define hash_table_string_compare ((hash_compare_func_t) strcmp)
+
++
++/**
++ * Compute hash value of a pointer
++ *
++ * \param key Pointer to be used as a hash key
++ *
++ * \note
++ * The memory pointed to by \c key is \b never accessed. The value of \c key
++ * itself is used as the hash key
++ *
++ * \sa hash_table_pointer_compare
++ */
++unsigned
++hash_table_pointer_hash(const void *key);
++
++
++/**
++ * Compare two pointers used as keys
++ *
++ * \sa hash_table_pointer_hash
++ */
++int
++hash_table_pointer_compare(const void *key1, const void *key2);
++
++#ifdef __cplusplus
++};
++#endif
+ #endif /* HASH_TABLE_H */
--- /dev/null
--- /dev/null
++/*
++ * Copyright (C) 2005-2007 Brian Paul All Rights Reserved.
++ * Copyright (C) 2008 VMware, Inc. All Rights Reserved.
++ * Copyright © 2010 Intel Corporation
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a
++ * copy of this software and associated documentation files (the "Software"),
++ * to deal in the Software without restriction, including without limitation
++ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
++ * and/or sell copies of the Software, and to permit persons to whom the
++ * Software is furnished to do so, subject to the following conditions:
++ *
++ * The above copyright notice and this permission notice (including the next
++ * paragraph) shall be included in all copies or substantial portions of the
++ * Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
++ * DEALINGS IN THE SOFTWARE.
++ */
++
++/**
++ * \file ir_to_mesa.cpp
++ *
++ * Translates the IR to ARB_fragment_program text if possible,
++ * printing the result
++ */
++
++#include <stdio.h>
++#include "ir.h"
++#include "ir_visitor.h"
++#include "ir_print_visitor.h"
++#include "ir_expression_flattening.h"
++#include "glsl_types.h"
++#include "glsl_parser_extras.h"
++#include "../glsl/program.h"
++#include "ir_optimization.h"
++#include "ast.h"
++
++extern "C" {
++#include "main/mtypes.h"
++#include "main/shaderobj.h"
++#include "main/uniforms.h"
++#include "program/prog_instruction.h"
++#include "program/prog_optimize.h"
++#include "program/prog_print.h"
++#include "program/program.h"
++#include "program/prog_uniform.h"
++#include "program/prog_parameter.h"
++}
++
++/**
++ * This struct is a corresponding struct to Mesa prog_src_register, with
++ * wider fields.
++ */
++typedef struct ir_to_mesa_src_reg {
++ int file; /**< PROGRAM_* from Mesa */
++ int index; /**< temporary index, VERT_ATTRIB_*, FRAG_ATTRIB_*, etc. */
++ GLuint swizzle; /**< SWIZZLE_XYZWONEZERO swizzles from Mesa. */
++ int negate; /**< NEGATE_XYZW mask from mesa */
++ /** Register index should be offset by the integer in this reg. */
++ ir_to_mesa_src_reg *reladdr;
++} ir_to_mesa_src_reg;
++
++typedef struct ir_to_mesa_dst_reg {
++ int file; /**< PROGRAM_* from Mesa */
++ int index; /**< temporary index, VERT_ATTRIB_*, FRAG_ATTRIB_*, etc. */
++ int writemask; /**< Bitfield of WRITEMASK_[XYZW] */
++ GLuint cond_mask:4;
++ /** Register index should be offset by the integer in this reg. */
++ ir_to_mesa_src_reg *reladdr;
++} ir_to_mesa_dst_reg;
++
++extern ir_to_mesa_src_reg ir_to_mesa_undef;
++
++class ir_to_mesa_instruction : public exec_node {
++public:
++ enum prog_opcode op;
++ ir_to_mesa_dst_reg dst_reg;
++ ir_to_mesa_src_reg src_reg[3];
++ /** Pointer to the ir source this tree came from for debugging */
++ ir_instruction *ir;
++ GLboolean cond_update;
++ int sampler; /**< sampler index */
++ int tex_target; /**< One of TEXTURE_*_INDEX */
++ GLboolean tex_shadow;
++
++ class function_entry *function; /* Set on OPCODE_CAL or OPCODE_BGNSUB */
++};
++
++class variable_storage : public exec_node {
++public:
++ variable_storage(ir_variable *var, int file, int index)
++ : file(file), index(index), var(var)
++ {
++ /* empty */
++ }
++
++ int file;
++ int index;
++ ir_variable *var; /* variable that maps to this, if any */
++};
++
++class function_entry : public exec_node {
++public:
++ ir_function_signature *sig;
++
++ /**
++ * identifier of this function signature used by the program.
++ *
++ * At the point that Mesa instructions for function calls are
++ * generated, we don't know the address of the first instruction of
++ * the function body. So we make the BranchTarget that is called a
++ * small integer and rewrite them during set_branchtargets().
++ */
++ int sig_id;
++
++ /**
++ * Pointer to first instruction of the function body.
++ *
++ * Set during function body emits after main() is processed.
++ */
++ ir_to_mesa_instruction *bgn_inst;
++
++ /**
++ * Index of the first instruction of the function body in actual
++ * Mesa IR.
++ *
++ * Set after convertion from ir_to_mesa_instruction to prog_instruction.
++ */
++ int inst;
++
++ /** Storage for the return value. */
++ ir_to_mesa_src_reg return_reg;
++};
++
++class ir_to_mesa_visitor : public ir_visitor {
++public:
++ ir_to_mesa_visitor();
++
++ function_entry *current_function;
++
++ GLcontext *ctx;
++ struct gl_program *prog;
++
++ int next_temp;
++
++ variable_storage *find_variable_storage(ir_variable *var);
++
++ function_entry *get_function_signature(ir_function_signature *sig);
++
++ ir_to_mesa_src_reg get_temp(const glsl_type *type);
++ void reladdr_to_temp(ir_instruction *ir,
++ ir_to_mesa_src_reg *reg, int *num_reladdr);
++
++ struct ir_to_mesa_src_reg src_reg_for_float(float val);
++
++ /**
++ * \name Visit methods
++ *
++ * As typical for the visitor pattern, there must be one \c visit method for
++ * each concrete subclass of \c ir_instruction. Virtual base classes within
++ * the hierarchy should not have \c visit methods.
++ */
++ /*@{*/
++ virtual void visit(ir_variable *);
++ virtual void visit(ir_loop *);
++ virtual void visit(ir_loop_jump *);
++ virtual void visit(ir_function_signature *);
++ virtual void visit(ir_function *);
++ virtual void visit(ir_expression *);
++ virtual void visit(ir_swizzle *);
++ virtual void visit(ir_dereference_variable *);
++ virtual void visit(ir_dereference_array *);
++ virtual void visit(ir_dereference_record *);
++ virtual void visit(ir_assignment *);
++ virtual void visit(ir_constant *);
++ virtual void visit(ir_call *);
++ virtual void visit(ir_return *);
++ virtual void visit(ir_discard *);
++ virtual void visit(ir_texture *);
++ virtual void visit(ir_if *);
++ /*@}*/
++
++ struct ir_to_mesa_src_reg result;
++
++ /** List of variable_storage */
++ exec_list variables;
++
++ /** List of function_entry */
++ exec_list function_signatures;
++ int next_signature_id;
++
++ /** List of ir_to_mesa_instruction */
++ exec_list instructions;
++
++ ir_to_mesa_instruction *ir_to_mesa_emit_op0(ir_instruction *ir,
++ enum prog_opcode op);
++
++ ir_to_mesa_instruction *ir_to_mesa_emit_op1(ir_instruction *ir,
++ enum prog_opcode op,
++ ir_to_mesa_dst_reg dst,
++ ir_to_mesa_src_reg src0);
++
++ ir_to_mesa_instruction *ir_to_mesa_emit_op2(ir_instruction *ir,
++ enum prog_opcode op,
++ ir_to_mesa_dst_reg dst,
++ ir_to_mesa_src_reg src0,
++ ir_to_mesa_src_reg src1);
++
++ ir_to_mesa_instruction *ir_to_mesa_emit_op3(ir_instruction *ir,
++ enum prog_opcode op,
++ ir_to_mesa_dst_reg dst,
++ ir_to_mesa_src_reg src0,
++ ir_to_mesa_src_reg src1,
++ ir_to_mesa_src_reg src2);
++
++ void ir_to_mesa_emit_scalar_op1(ir_instruction *ir,
++ enum prog_opcode op,
++ ir_to_mesa_dst_reg dst,
++ ir_to_mesa_src_reg src0);
++
++ void ir_to_mesa_emit_scalar_op2(ir_instruction *ir,
++ enum prog_opcode op,
++ ir_to_mesa_dst_reg dst,
++ ir_to_mesa_src_reg src0,
++ ir_to_mesa_src_reg src1);
++
++ GLboolean try_emit_mad(ir_expression *ir,
++ int mul_operand);
++
++ int *sampler_map;
++ int sampler_map_size;
++
++ void map_sampler(int location, int sampler);
++ int get_sampler_number(int location);
++
++ void *mem_ctx;
++};
++
++ir_to_mesa_src_reg ir_to_mesa_undef = {
++ PROGRAM_UNDEFINED, 0, SWIZZLE_NOOP, NEGATE_NONE, NULL,
++};
++
++ir_to_mesa_dst_reg ir_to_mesa_undef_dst = {
++ PROGRAM_UNDEFINED, 0, SWIZZLE_NOOP, COND_TR, NULL,
++};
++
++ir_to_mesa_dst_reg ir_to_mesa_address_reg = {
++ PROGRAM_ADDRESS, 0, WRITEMASK_X, COND_TR, NULL
++};
++
++static int swizzle_for_size(int size)
++{
++ int size_swizzles[4] = {
++ MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
++ MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y),
++ MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z),
++ MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W),
++ };
++
++ return size_swizzles[size - 1];
++}
++
++ir_to_mesa_instruction *
++ir_to_mesa_visitor::ir_to_mesa_emit_op3(ir_instruction *ir,
++ enum prog_opcode op,
++ ir_to_mesa_dst_reg dst,
++ ir_to_mesa_src_reg src0,
++ ir_to_mesa_src_reg src1,
++ ir_to_mesa_src_reg src2)
++{
++ ir_to_mesa_instruction *inst = new(mem_ctx) ir_to_mesa_instruction();
++ int num_reladdr = 0;
++
++ /* If we have to do relative addressing, we want to load the ARL
++ * reg directly for one of the regs, and preload the other reladdr
++ * sources into temps.
++ */
++ num_reladdr += dst.reladdr != NULL;
++ num_reladdr += src0.reladdr != NULL;
++ num_reladdr += src1.reladdr != NULL;
++ num_reladdr += src2.reladdr != NULL;
++
++ reladdr_to_temp(ir, &src2, &num_reladdr);
++ reladdr_to_temp(ir, &src1, &num_reladdr);
++ reladdr_to_temp(ir, &src0, &num_reladdr);
++
++ if (dst.reladdr) {
++ ir_to_mesa_emit_op1(ir, OPCODE_ARL, ir_to_mesa_address_reg,
++ *dst.reladdr);
++
++ num_reladdr--;
++ }
++ assert(num_reladdr == 0);
++
++ inst->op = op;
++ inst->dst_reg = dst;
++ inst->src_reg[0] = src0;
++ inst->src_reg[1] = src1;
++ inst->src_reg[2] = src2;
++ inst->ir = ir;
++
++ inst->function = NULL;
++
++ this->instructions.push_tail(inst);
++
++ return inst;
++}
++
++
++ir_to_mesa_instruction *
++ir_to_mesa_visitor::ir_to_mesa_emit_op2(ir_instruction *ir,
++ enum prog_opcode op,
++ ir_to_mesa_dst_reg dst,
++ ir_to_mesa_src_reg src0,
++ ir_to_mesa_src_reg src1)
++{
++ return ir_to_mesa_emit_op3(ir, op, dst, src0, src1, ir_to_mesa_undef);
++}
++
++ir_to_mesa_instruction *
++ir_to_mesa_visitor::ir_to_mesa_emit_op1(ir_instruction *ir,
++ enum prog_opcode op,
++ ir_to_mesa_dst_reg dst,
++ ir_to_mesa_src_reg src0)
++{
++ return ir_to_mesa_emit_op3(ir, op, dst,
++ src0, ir_to_mesa_undef, ir_to_mesa_undef);
++}
++
++ir_to_mesa_instruction *
++ir_to_mesa_visitor::ir_to_mesa_emit_op0(ir_instruction *ir,
++ enum prog_opcode op)
++{
++ return ir_to_mesa_emit_op3(ir, op, ir_to_mesa_undef_dst,
++ ir_to_mesa_undef,
++ ir_to_mesa_undef,
++ ir_to_mesa_undef);
++}
++
++void
++ir_to_mesa_visitor::map_sampler(int location, int sampler)
++{
++ if (this->sampler_map_size <= location) {
++ this->sampler_map = talloc_realloc(this->mem_ctx, this->sampler_map,
++ int, location + 1);
++ this->sampler_map_size = location + 1;
++ }
++
++ this->sampler_map[location] = sampler;
++}
++
++int
++ir_to_mesa_visitor::get_sampler_number(int location)
++{
++ assert(location < this->sampler_map_size);
++ return this->sampler_map[location];
++}
++
++inline ir_to_mesa_dst_reg
++ir_to_mesa_dst_reg_from_src(ir_to_mesa_src_reg reg)
++{
++ ir_to_mesa_dst_reg dst_reg;
++
++ dst_reg.file = reg.file;
++ dst_reg.index = reg.index;
++ dst_reg.writemask = WRITEMASK_XYZW;
++ dst_reg.cond_mask = COND_TR;
++ dst_reg.reladdr = reg.reladdr;
++
++ return dst_reg;
++}
++
++inline ir_to_mesa_src_reg
++ir_to_mesa_src_reg_from_dst(ir_to_mesa_dst_reg reg)
++{
++ ir_to_mesa_src_reg src_reg;
++
++ src_reg.file = reg.file;
++ src_reg.index = reg.index;
++ src_reg.swizzle = SWIZZLE_XYZW;
++ src_reg.negate = 0;
++ src_reg.reladdr = reg.reladdr;
++
++ return src_reg;
++}
++
++/**
++ * Emits Mesa scalar opcodes to produce unique answers across channels.
++ *
++ * Some Mesa opcodes are scalar-only, like ARB_fp/vp. The src X
++ * channel determines the result across all channels. So to do a vec4
++ * of this operation, we want to emit a scalar per source channel used
++ * to produce dest channels.
++ */
++void
++ir_to_mesa_visitor::ir_to_mesa_emit_scalar_op2(ir_instruction *ir,
++ enum prog_opcode op,
++ ir_to_mesa_dst_reg dst,
++ ir_to_mesa_src_reg orig_src0,
++ ir_to_mesa_src_reg orig_src1)
++{
++ int i, j;
++ int done_mask = ~dst.writemask;
++
++ /* Mesa RCP is a scalar operation splatting results to all channels,
++ * like ARB_fp/vp. So emit as many RCPs as necessary to cover our
++ * dst channels.
++ */
++ for (i = 0; i < 4; i++) {
++ GLuint this_mask = (1 << i);
++ ir_to_mesa_instruction *inst;
++ ir_to_mesa_src_reg src0 = orig_src0;
++ ir_to_mesa_src_reg src1 = orig_src1;
++
++ if (done_mask & this_mask)
++ continue;
++
++ GLuint src0_swiz = GET_SWZ(src0.swizzle, i);
++ GLuint src1_swiz = GET_SWZ(src1.swizzle, i);
++ for (j = i + 1; j < 4; j++) {
++ if (!(done_mask & (1 << j)) &&
++ GET_SWZ(src0.swizzle, j) == src0_swiz &&
++ GET_SWZ(src1.swizzle, j) == src1_swiz) {
++ this_mask |= (1 << j);
++ }
++ }
++ src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz,
++ src0_swiz, src0_swiz);
++ src1.swizzle = MAKE_SWIZZLE4(src1_swiz, src1_swiz,
++ src1_swiz, src1_swiz);
++
++ inst = ir_to_mesa_emit_op2(ir, op,
++ dst,
++ src0,
++ src1);
++ inst->dst_reg.writemask = this_mask;
++ done_mask |= this_mask;
++ }
++}
++
++void
++ir_to_mesa_visitor::ir_to_mesa_emit_scalar_op1(ir_instruction *ir,
++ enum prog_opcode op,
++ ir_to_mesa_dst_reg dst,
++ ir_to_mesa_src_reg src0)
++{
++ ir_to_mesa_src_reg undef = ir_to_mesa_undef;
++
++ undef.swizzle = SWIZZLE_XXXX;
++
++ ir_to_mesa_emit_scalar_op2(ir, op, dst, src0, undef);
++}
++
++struct ir_to_mesa_src_reg
++ir_to_mesa_visitor::src_reg_for_float(float val)
++{
++ ir_to_mesa_src_reg src_reg;
++
++ src_reg.file = PROGRAM_CONSTANT;
++ src_reg.index = _mesa_add_unnamed_constant(this->prog->Parameters,
++ &val, 1, &src_reg.swizzle);
++ src_reg.reladdr = NULL;
++ src_reg.negate = 0;
++
++ return src_reg;
++}
++
++static int
++type_size(const struct glsl_type *type)
++{
++ unsigned int i;
++ int size;
++
++ switch (type->base_type) {
++ case GLSL_TYPE_UINT:
++ case GLSL_TYPE_INT:
++ case GLSL_TYPE_FLOAT:
++ case GLSL_TYPE_BOOL:
++ if (type->is_matrix()) {
++ return type->matrix_columns;
++ } else {
++ /* Regardless of size of vector, it gets a vec4. This is bad
++ * packing for things like floats, but otherwise arrays become a
++ * mess. Hopefully a later pass over the code can pack scalars
++ * down if appropriate.
++ */
++ return 1;
++ }
++ case GLSL_TYPE_ARRAY:
++ return type_size(type->fields.array) * type->length;
++ case GLSL_TYPE_STRUCT:
++ size = 0;
++ for (i = 0; i < type->length; i++) {
++ size += type_size(type->fields.structure[i].type);
++ }
++ return size;
++ default:
++ assert(0);
++ }
++}
++
++/**
++ * In the initial pass of codegen, we assign temporary numbers to
++ * intermediate results. (not SSA -- variable assignments will reuse
++ * storage). Actual register allocation for the Mesa VM occurs in a
++ * pass over the Mesa IR later.
++ */
++ir_to_mesa_src_reg
++ir_to_mesa_visitor::get_temp(const glsl_type *type)
++{
++ ir_to_mesa_src_reg src_reg;
++ int swizzle[4];
++ int i;
++
++ assert(!type->is_array());
++
++ src_reg.file = PROGRAM_TEMPORARY;
++ src_reg.index = next_temp;
++ src_reg.reladdr = NULL;
++ next_temp += type_size(type);
++
++ for (i = 0; i < type->vector_elements; i++)
++ swizzle[i] = i;
++ for (; i < 4; i++)
++ swizzle[i] = type->vector_elements - 1;
++ src_reg.swizzle = MAKE_SWIZZLE4(swizzle[0], swizzle[1],
++ swizzle[2], swizzle[3]);
++ src_reg.negate = 0;
++
++ return src_reg;
++}
++
++variable_storage *
++ir_to_mesa_visitor::find_variable_storage(ir_variable *var)
++{
++
++ variable_storage *entry;
++
++ foreach_iter(exec_list_iterator, iter, this->variables) {
++ entry = (variable_storage *)iter.get();
++
++ if (entry->var == var)
++ return entry;
++ }
++
++ return NULL;
++}
++
++void
++ir_to_mesa_visitor::visit(ir_variable *ir)
++{
++ (void)ir;
++}
++
++void
++ir_to_mesa_visitor::visit(ir_loop *ir)
++{
++ assert(!ir->from);
++ assert(!ir->to);
++ assert(!ir->increment);
++ assert(!ir->counter);
++
++ ir_to_mesa_emit_op0(NULL, OPCODE_BGNLOOP);
++ visit_exec_list(&ir->body_instructions, this);
++ ir_to_mesa_emit_op0(NULL, OPCODE_ENDLOOP);
++}
++
++void
++ir_to_mesa_visitor::visit(ir_loop_jump *ir)
++{
++ switch (ir->mode) {
++ case ir_loop_jump::jump_break:
++ ir_to_mesa_emit_op0(NULL, OPCODE_BRK);
++ break;
++ case ir_loop_jump::jump_continue:
++ ir_to_mesa_emit_op0(NULL, OPCODE_CONT);
++ break;
++ }
++}
++
++
++void
++ir_to_mesa_visitor::visit(ir_function_signature *ir)
++{
++ assert(0);
++ (void)ir;
++}
++
++void
++ir_to_mesa_visitor::visit(ir_function *ir)
++{
++ /* Ignore function bodies other than main() -- we shouldn't see calls to
++ * them since they should all be inlined before we get to ir_to_mesa.
++ */
++ if (strcmp(ir->name, "main") == 0) {
++ const ir_function_signature *sig;
++ exec_list empty;
++
++ sig = ir->matching_signature(&empty);
++
++ assert(sig);
++
++ foreach_iter(exec_list_iterator, iter, sig->body) {
++ ir_instruction *ir = (ir_instruction *)iter.get();
++
++ ir->accept(this);
++ }
++ }
++}
++
++GLboolean
++ir_to_mesa_visitor::try_emit_mad(ir_expression *ir, int mul_operand)
++{
++ int nonmul_operand = 1 - mul_operand;
++ ir_to_mesa_src_reg a, b, c;
++
++ ir_expression *expr = ir->operands[mul_operand]->as_expression();
++ if (!expr || expr->operation != ir_binop_mul)
++ return false;
++
++ expr->operands[0]->accept(this);
++ a = this->result;
++ expr->operands[1]->accept(this);
++ b = this->result;
++ ir->operands[nonmul_operand]->accept(this);
++ c = this->result;
++
++ this->result = get_temp(ir->type);
++ ir_to_mesa_emit_op3(ir, OPCODE_MAD,
++ ir_to_mesa_dst_reg_from_src(this->result), a, b, c);
++
++ return true;
++}
++
++void
++ir_to_mesa_visitor::reladdr_to_temp(ir_instruction *ir,
++ ir_to_mesa_src_reg *reg, int *num_reladdr)
++{
++ if (!reg->reladdr)
++ return;
++
++ ir_to_mesa_emit_op1(ir, OPCODE_ARL, ir_to_mesa_address_reg, *reg->reladdr);
++
++ if (*num_reladdr != 1) {
++ ir_to_mesa_src_reg temp = get_temp(glsl_type::vec4_type);
++
++ ir_to_mesa_emit_op1(ir, OPCODE_MOV,
++ ir_to_mesa_dst_reg_from_src(temp), *reg);
++ *reg = temp;
++ }
++
++ (*num_reladdr)--;
++}
++
++void
++ir_to_mesa_visitor::visit(ir_expression *ir)
++{
++ unsigned int operand;
++ struct ir_to_mesa_src_reg op[2];
++ struct ir_to_mesa_src_reg result_src;
++ struct ir_to_mesa_dst_reg result_dst;
++ const glsl_type *vec4_type = glsl_type::get_instance(GLSL_TYPE_FLOAT, 4, 1);
++ const glsl_type *vec3_type = glsl_type::get_instance(GLSL_TYPE_FLOAT, 3, 1);
++ const glsl_type *vec2_type = glsl_type::get_instance(GLSL_TYPE_FLOAT, 2, 1);
++
++ /* Quick peephole: Emit OPCODE_MAD(a, b, c) instead of ADD(MUL(a, b), c)
++ */
++ if (ir->operation == ir_binop_add) {
++ if (try_emit_mad(ir, 1))
++ return;
++ if (try_emit_mad(ir, 0))
++ return;
++ }
++
++ for (operand = 0; operand < ir->get_num_operands(); operand++) {
++ this->result.file = PROGRAM_UNDEFINED;
++ ir->operands[operand]->accept(this);
++ if (this->result.file == PROGRAM_UNDEFINED) {
++ ir_print_visitor v;
++ printf("Failed to get tree for expression operand:\n");
++ ir->operands[operand]->accept(&v);
++ exit(1);
++ }
++ op[operand] = this->result;
++
++ /* Matrix expression operands should have been broken down to vector
++ * operations already.
++ */
++ assert(!ir->operands[operand]->type->is_matrix());
++ }
++
++ this->result.file = PROGRAM_UNDEFINED;
++
++ /* Storage for our result. Ideally for an assignment we'd be using
++ * the actual storage for the result here, instead.
++ */
++ result_src = get_temp(ir->type);
++ /* convenience for the emit functions below. */
++ result_dst = ir_to_mesa_dst_reg_from_src(result_src);
++ /* Limit writes to the channels that will be used by result_src later.
++ * This does limit this temp's use as a temporary for multi-instruction
++ * sequences.
++ */
++ result_dst.writemask = (1 << ir->type->vector_elements) - 1;
++
++ switch (ir->operation) {
++ case ir_unop_logic_not:
++ ir_to_mesa_emit_op2(ir, OPCODE_SEQ, result_dst,
++ op[0], src_reg_for_float(0.0));
++ break;
++ case ir_unop_neg:
++ op[0].negate = ~op[0].negate;
++ result_src = op[0];
++ break;
++ case ir_unop_abs:
++ ir_to_mesa_emit_op1(ir, OPCODE_ABS, result_dst, op[0]);
++ break;
++ case ir_unop_sign:
++ ir_to_mesa_emit_op1(ir, OPCODE_SSG, result_dst, op[0]);
++ break;
++ case ir_unop_rcp:
++ ir_to_mesa_emit_scalar_op1(ir, OPCODE_RCP, result_dst, op[0]);
++ break;
++
++ case ir_unop_exp:
++ ir_to_mesa_emit_scalar_op2(ir, OPCODE_POW, result_dst,
++ src_reg_for_float(M_E), op[0]);
++ break;
++ case ir_unop_exp2:
++ ir_to_mesa_emit_scalar_op1(ir, OPCODE_EX2, result_dst, op[0]);
++ break;
++ case ir_unop_log:
++ ir_to_mesa_emit_scalar_op1(ir, OPCODE_LOG, result_dst, op[0]);
++ break;
++ case ir_unop_log2:
++ ir_to_mesa_emit_scalar_op1(ir, OPCODE_LG2, result_dst, op[0]);
++ break;
++ case ir_unop_sin:
++ ir_to_mesa_emit_scalar_op1(ir, OPCODE_SIN, result_dst, op[0]);
++ break;
++ case ir_unop_cos:
++ ir_to_mesa_emit_scalar_op1(ir, OPCODE_COS, result_dst, op[0]);
++ break;
++
++ case ir_unop_dFdx:
++ ir_to_mesa_emit_op1(ir, OPCODE_DDX, result_dst, op[0]);
++ break;
++ case ir_unop_dFdy:
++ ir_to_mesa_emit_op1(ir, OPCODE_DDY, result_dst, op[0]);
++ break;
++
++ case ir_binop_add:
++ ir_to_mesa_emit_op2(ir, OPCODE_ADD, result_dst, op[0], op[1]);
++ break;
++ case ir_binop_sub:
++ ir_to_mesa_emit_op2(ir, OPCODE_SUB, result_dst, op[0], op[1]);
++ break;
++
++ case ir_binop_mul:
++ ir_to_mesa_emit_op2(ir, OPCODE_MUL, result_dst, op[0], op[1]);
++ break;
++ case ir_binop_div:
++ assert(!"not reached: should be handled by ir_div_to_mul_rcp");
++ case ir_binop_mod:
++ assert(!"ir_binop_mod should have been converted to b * fract(a/b)");
++ break;
++
++ case ir_binop_less:
++ ir_to_mesa_emit_op2(ir, OPCODE_SLT, result_dst, op[0], op[1]);
++ break;
++ case ir_binop_greater:
++ ir_to_mesa_emit_op2(ir, OPCODE_SGT, result_dst, op[0], op[1]);
++ break;
++ case ir_binop_lequal:
++ ir_to_mesa_emit_op2(ir, OPCODE_SLE, result_dst, op[0], op[1]);
++ break;
++ case ir_binop_gequal:
++ ir_to_mesa_emit_op2(ir, OPCODE_SGE, result_dst, op[0], op[1]);
++ break;
++ case ir_binop_equal:
++ ir_to_mesa_emit_op2(ir, OPCODE_SEQ, result_dst, op[0], op[1]);
++ break;
++ case ir_binop_logic_xor:
++ case ir_binop_nequal:
++ ir_to_mesa_emit_op2(ir, OPCODE_SNE, result_dst, op[0], op[1]);
++ break;
++
++ case ir_binop_logic_or:
++ /* This could be a saturated add and skip the SNE. */
++ ir_to_mesa_emit_op2(ir, OPCODE_ADD,
++ result_dst,
++ op[0], op[1]);
++
++ ir_to_mesa_emit_op2(ir, OPCODE_SNE,
++ result_dst,
++ result_src, src_reg_for_float(0.0));
++ break;
++
++ case ir_binop_logic_and:
++ /* the bool args are stored as float 0.0 or 1.0, so "mul" gives us "and". */
++ ir_to_mesa_emit_op2(ir, OPCODE_MUL,
++ result_dst,
++ op[0], op[1]);
++ break;
++
++ case ir_binop_dot:
++ if (ir->operands[0]->type == vec4_type) {
++ assert(ir->operands[1]->type == vec4_type);
++ ir_to_mesa_emit_op2(ir, OPCODE_DP4,
++ result_dst,
++ op[0], op[1]);
++ } else if (ir->operands[0]->type == vec3_type) {
++ assert(ir->operands[1]->type == vec3_type);
++ ir_to_mesa_emit_op2(ir, OPCODE_DP3,
++ result_dst,
++ op[0], op[1]);
++ } else if (ir->operands[0]->type == vec2_type) {
++ assert(ir->operands[1]->type == vec2_type);
++ ir_to_mesa_emit_op2(ir, OPCODE_DP2,
++ result_dst,
++ op[0], op[1]);
++ }
++ break;
++
++ case ir_binop_cross:
++ ir_to_mesa_emit_op2(ir, OPCODE_XPD, result_dst, op[0], op[1]);
++ break;
++
++ case ir_unop_sqrt:
++ ir_to_mesa_emit_scalar_op1(ir, OPCODE_RSQ, result_dst, op[0]);
++ ir_to_mesa_emit_scalar_op1(ir, OPCODE_RCP, result_dst, result_src);
++ /* For incoming channels < 0, set the result to 0. */
++ ir_to_mesa_emit_op3(ir, OPCODE_CMP, result_dst,
++ op[0], src_reg_for_float(0.0), result_src);
++ break;
++ case ir_unop_rsq:
++ ir_to_mesa_emit_scalar_op1(ir, OPCODE_RSQ, result_dst, op[0]);
++ break;
++ case ir_unop_i2f:
++ case ir_unop_b2f:
++ case ir_unop_b2i:
++ /* Mesa IR lacks types, ints are stored as truncated floats. */
++ result_src = op[0];
++ break;
++ case ir_unop_f2i:
++ ir_to_mesa_emit_op1(ir, OPCODE_TRUNC, result_dst, op[0]);
++ break;
++ case ir_unop_f2b:
++ case ir_unop_i2b:
++ ir_to_mesa_emit_op2(ir, OPCODE_SNE, result_dst,
++ result_src, src_reg_for_float(0.0));
++ break;
++ case ir_unop_trunc:
++ ir_to_mesa_emit_op1(ir, OPCODE_TRUNC, result_dst, op[0]);
++ break;
++ case ir_unop_ceil:
++ op[0].negate = ~op[0].negate;
++ ir_to_mesa_emit_op1(ir, OPCODE_FLR, result_dst, op[0]);
++ result_src.negate = ~result_src.negate;
++ break;
++ case ir_unop_floor:
++ ir_to_mesa_emit_op1(ir, OPCODE_FLR, result_dst, op[0]);
++ break;
++ case ir_unop_fract:
++ ir_to_mesa_emit_op1(ir, OPCODE_FRC, result_dst, op[0]);
++ break;
++
++ case ir_binop_min:
++ ir_to_mesa_emit_op2(ir, OPCODE_MIN, result_dst, op[0], op[1]);
++ break;
++ case ir_binop_max:
++ ir_to_mesa_emit_op2(ir, OPCODE_MAX, result_dst, op[0], op[1]);
++ break;
++ case ir_binop_pow:
++ ir_to_mesa_emit_scalar_op2(ir, OPCODE_POW, result_dst, op[0], op[1]);
++ break;
++
++ case ir_unop_bit_not:
++ case ir_unop_u2f:
++ case ir_binop_lshift:
++ case ir_binop_rshift:
++ case ir_binop_bit_and:
++ case ir_binop_bit_xor:
++ case ir_binop_bit_or:
++ assert(!"GLSL 1.30 features unsupported");
++ break;
++ }
++
++ this->result = result_src;
++}
++
++
++void
++ir_to_mesa_visitor::visit(ir_swizzle *ir)
++{
++ ir_to_mesa_src_reg src_reg;
++ int i;
++ int swizzle[4];
++
++ /* Note that this is only swizzles in expressions, not those on the left
++ * hand side of an assignment, which do write masking. See ir_assignment
++ * for that.
++ */
++
++ ir->val->accept(this);
++ src_reg = this->result;
++ assert(src_reg.file != PROGRAM_UNDEFINED);
++
++ for (i = 0; i < 4; i++) {
++ if (i < ir->type->vector_elements) {
++ switch (i) {
++ case 0:
++ swizzle[i] = GET_SWZ(src_reg.swizzle, ir->mask.x);
++ break;
++ case 1:
++ swizzle[i] = GET_SWZ(src_reg.swizzle, ir->mask.y);
++ break;
++ case 2:
++ swizzle[i] = GET_SWZ(src_reg.swizzle, ir->mask.z);
++ break;
++ case 3:
++ swizzle[i] = GET_SWZ(src_reg.swizzle, ir->mask.w);
++ break;
++ }
++ } else {
++ /* If the type is smaller than a vec4, replicate the last
++ * channel out.
++ */
++ swizzle[i] = swizzle[ir->type->vector_elements - 1];
++ }
++ }
++
++ src_reg.swizzle = MAKE_SWIZZLE4(swizzle[0],
++ swizzle[1],
++ swizzle[2],
++ swizzle[3]);
++
++ this->result = src_reg;
++}
++
++static int
++add_matrix_ref(struct gl_program *prog, int *tokens)
++{
++ int base_pos = -1;
++ int i;
++
++ /* Add a ref for each column. It looks like the reason we do
++ * it this way is that _mesa_add_state_reference doesn't work
++ * for things that aren't vec4s, so the tokens[2]/tokens[3]
++ * range has to be equal.
++ */
++ for (i = 0; i < 4; i++) {
++ tokens[2] = i;
++ tokens[3] = i;
++ int pos = _mesa_add_state_reference(prog->Parameters,
++ (gl_state_index *)tokens);
++ if (base_pos == -1)
++ base_pos = pos;
++ else
++ assert(base_pos + i == pos);
++ }
++
++ return base_pos;
++}
++
++static variable_storage *
++get_builtin_matrix_ref(void *mem_ctx, struct gl_program *prog, ir_variable *var,
++ ir_rvalue *array_index)
++{
++ /*
++ * NOTE: The ARB_vertex_program extension specified that matrices get
++ * loaded in registers in row-major order. With GLSL, we want column-
++ * major order. So, we need to transpose all matrices here...
++ */
++ static const struct {
++ const char *name;
++ int matrix;
++ int modifier;
++ } matrices[] = {
++ { "gl_ModelViewMatrix", STATE_MODELVIEW_MATRIX, STATE_MATRIX_TRANSPOSE },
++ { "gl_ModelViewMatrixInverse", STATE_MODELVIEW_MATRIX, STATE_MATRIX_INVTRANS },
++ { "gl_ModelViewMatrixTranspose", STATE_MODELVIEW_MATRIX, 0 },
++ { "gl_ModelViewMatrixInverseTranspose", STATE_MODELVIEW_MATRIX, STATE_MATRIX_INVERSE },
++
++ { "gl_ProjectionMatrix", STATE_PROJECTION_MATRIX, STATE_MATRIX_TRANSPOSE },
++ { "gl_ProjectionMatrixInverse", STATE_PROJECTION_MATRIX, STATE_MATRIX_INVTRANS },
++ { "gl_ProjectionMatrixTranspose", STATE_PROJECTION_MATRIX, 0 },
++ { "gl_ProjectionMatrixInverseTranspose", STATE_PROJECTION_MATRIX, STATE_MATRIX_INVERSE },
++
++ { "gl_ModelViewProjectionMatrix", STATE_MVP_MATRIX, STATE_MATRIX_TRANSPOSE },
++ { "gl_ModelViewProjectionMatrixInverse", STATE_MVP_MATRIX, STATE_MATRIX_INVTRANS },
++ { "gl_ModelViewProjectionMatrixTranspose", STATE_MVP_MATRIX, 0 },
++ { "gl_ModelViewProjectionMatrixInverseTranspose", STATE_MVP_MATRIX, STATE_MATRIX_INVERSE },
++
++ { "gl_TextureMatrix", STATE_TEXTURE_MATRIX, STATE_MATRIX_TRANSPOSE },
++ { "gl_TextureMatrixInverse", STATE_TEXTURE_MATRIX, STATE_MATRIX_INVTRANS },
++ { "gl_TextureMatrixTranspose", STATE_TEXTURE_MATRIX, 0 },
++ { "gl_TextureMatrixInverseTranspose", STATE_TEXTURE_MATRIX, STATE_MATRIX_INVERSE },
++
++ { "gl_NormalMatrix", STATE_MODELVIEW_MATRIX, STATE_MATRIX_INVERSE },
++
++ };
++ unsigned int i;
++ variable_storage *entry;
++
++ /* C++ gets angry when we try to use an int as a gl_state_index, so we use
++ * ints for gl_state_index. Make sure they're compatible.
++ */
++ assert(sizeof(gl_state_index) == sizeof(int));
++
++ for (i = 0; i < Elements(matrices); i++) {
++ if (strcmp(var->name, matrices[i].name) == 0) {
++ int tokens[STATE_LENGTH];
++ int base_pos = -1;
++
++ tokens[0] = matrices[i].matrix;
++ tokens[4] = matrices[i].modifier;
++ if (matrices[i].matrix == STATE_TEXTURE_MATRIX) {
++ ir_constant *index = array_index->constant_expression_value();
++ if (index) {
++ tokens[1] = index->value.i[0];
++ base_pos = add_matrix_ref(prog, tokens);
++ } else {
++ for (i = 0; i < var->type->length; i++) {
++ tokens[1] = i;
++ int pos = add_matrix_ref(prog, tokens);
++ if (base_pos == -1)
++ base_pos = pos;
++ else
++ assert(base_pos + (int)i * 4 == pos);
++ }
++ }
++ } else {
++ tokens[1] = 0; /* unused array index */
++ base_pos = add_matrix_ref(prog, tokens);
++ }
++ tokens[4] = matrices[i].modifier;
++
++ entry = new(mem_ctx) variable_storage(var,
++ PROGRAM_STATE_VAR,
++ base_pos);
++
++ return entry;
++ }
++ }
++
++ return NULL;
++}
++
++void
++ir_to_mesa_visitor::visit(ir_dereference_variable *ir)
++{
++ ir_to_mesa_src_reg src_reg;
++ variable_storage *entry = find_variable_storage(ir->var);
++ unsigned int loc;
++
++ if (!entry) {
++ switch (ir->var->mode) {
++ case ir_var_uniform:
++ entry = get_builtin_matrix_ref(this->mem_ctx, this->prog, ir->var,
++ NULL);
++ if (entry)
++ break;
++
++ /* FINISHME: Fix up uniform name for arrays and things */
++ if (ir->var->type->base_type == GLSL_TYPE_SAMPLER) {
++ /* FINISHME: we whack the location of the var here, which
++ * is probably not expected. But we need to communicate
++ * mesa's sampler number to the tex instruction.
++ */
++ int sampler = _mesa_add_sampler(this->prog->Parameters,
++ ir->var->name,
++ ir->var->type->gl_type);
++ map_sampler(ir->var->location, sampler);
++
++ entry = new(mem_ctx) variable_storage(ir->var, PROGRAM_SAMPLER,
++ sampler);
++ this->variables.push_tail(entry);
++ break;
++ }
++
++ assert(ir->var->type->gl_type != 0 &&
++ ir->var->type->gl_type != GL_INVALID_ENUM);
++ loc = _mesa_add_uniform(this->prog->Parameters,
++ ir->var->name,
++ type_size(ir->var->type) * 4,
++ ir->var->type->gl_type,
++ NULL);
++
++ /* Always mark the uniform used at this point. If it isn't
++ * used, dead code elimination should have nuked the decl already.
++ */
++ this->prog->Parameters->Parameters[loc].Used = GL_TRUE;
++
++ entry = new(mem_ctx) variable_storage(ir->var, PROGRAM_UNIFORM, loc);
++ this->variables.push_tail(entry);
++ break;
++ case ir_var_in:
++ case ir_var_out:
++ case ir_var_inout:
++ /* The linker assigns locations for varyings and attributes,
++ * including deprecated builtins (like gl_Color), user-assign
++ * generic attributes (glBindVertexLocation), and
++ * user-defined varyings.
++ *
++ * FINISHME: We would hit this path for function arguments. Fix!
++ */
++ assert(ir->var->location != -1);
++ if (ir->var->mode == ir_var_in ||
++ ir->var->mode == ir_var_inout) {
++ entry = new(mem_ctx) variable_storage(ir->var,
++ PROGRAM_INPUT,
++ ir->var->location);
++
++ if (this->prog->Target == GL_VERTEX_PROGRAM_ARB &&
++ ir->var->location >= VERT_ATTRIB_GENERIC0) {
++ _mesa_add_attribute(prog->Attributes,
++ ir->var->name,
++ type_size(ir->var->type) * 4,
++ ir->var->type->gl_type,
++ ir->var->location - VERT_ATTRIB_GENERIC0);
++ }
++ } else {
++ entry = new(mem_ctx) variable_storage(ir->var,
++ PROGRAM_OUTPUT,
++ ir->var->location);
++ }
++
++ break;
++ case ir_var_auto:
++ case ir_var_temporary:
++ entry = new(mem_ctx) variable_storage(ir->var, PROGRAM_TEMPORARY,
++ this->next_temp);
++ this->variables.push_tail(entry);
++
++ next_temp += type_size(ir->var->type);
++ break;
++ }
++
++ if (!entry) {
++ printf("Failed to make storage for %s\n", ir->var->name);
++ exit(1);
++ }
++ }
++
++ src_reg.file = entry->file;
++ src_reg.index = entry->index;
++ /* If the type is smaller than a vec4, replicate the last channel out. */
++ if (ir->type->is_scalar() || ir->type->is_vector())
++ src_reg.swizzle = swizzle_for_size(ir->var->type->vector_elements);
++ else
++ src_reg.swizzle = SWIZZLE_NOOP;
++ src_reg.reladdr = NULL;
++ src_reg.negate = 0;
++
++ this->result = src_reg;
++}
++
++void
++ir_to_mesa_visitor::visit(ir_dereference_array *ir)
++{
++ ir_constant *index;
++ ir_to_mesa_src_reg src_reg;
++ ir_dereference_variable *deref_var = ir->array->as_dereference_variable();
++ int element_size = type_size(ir->type);
++
++ index = ir->array_index->constant_expression_value();
++
++ if (deref_var && strncmp(deref_var->var->name,
++ "gl_TextureMatrix",
++ strlen("gl_TextureMatrix")) == 0) {
++ ir_to_mesa_src_reg src_reg;
++ struct variable_storage *entry;
++
++ entry = get_builtin_matrix_ref(this->mem_ctx, this->prog, deref_var->var,
++ ir->array_index);
++ assert(entry);
++
++ src_reg.file = entry->file;
++ src_reg.index = entry->index;
++ src_reg.swizzle = swizzle_for_size(ir->type->vector_elements);
++ src_reg.negate = 0;
++
++ if (index) {
++ src_reg.reladdr = NULL;
++ } else {
++ ir_to_mesa_src_reg index_reg = get_temp(glsl_type::float_type);
++
++ ir->array_index->accept(this);
++ ir_to_mesa_emit_op2(ir, OPCODE_MUL,
++ ir_to_mesa_dst_reg_from_src(index_reg),
++ this->result, src_reg_for_float(element_size));
++
++ src_reg.reladdr = talloc(mem_ctx, ir_to_mesa_src_reg);
++ memcpy(src_reg.reladdr, &index_reg, sizeof(index_reg));
++ }
++
++ this->result = src_reg;
++ return;
++ }
++
++ ir->array->accept(this);
++ src_reg = this->result;
++
++ if (index) {
++ src_reg.index += index->value.i[0] * element_size;
++ } else {
++ ir_to_mesa_src_reg array_base = this->result;
++ /* Variable index array dereference. It eats the "vec4" of the
++ * base of the array and an index that offsets the Mesa register
++ * index.
++ */
++ ir->array_index->accept(this);
++
++ ir_to_mesa_src_reg index_reg;
++
++ if (element_size == 1) {
++ index_reg = this->result;
++ } else {
++ index_reg = get_temp(glsl_type::float_type);
++
++ ir_to_mesa_emit_op2(ir, OPCODE_MUL,
++ ir_to_mesa_dst_reg_from_src(index_reg),
++ this->result, src_reg_for_float(element_size));
++ }
++
++ src_reg.reladdr = talloc(mem_ctx, ir_to_mesa_src_reg);
++ memcpy(src_reg.reladdr, &index_reg, sizeof(index_reg));
++ }
++
++ /* If the type is smaller than a vec4, replicate the last channel out. */
++ if (ir->type->is_scalar() || ir->type->is_vector())
++ src_reg.swizzle = swizzle_for_size(ir->type->vector_elements);
++ else
++ src_reg.swizzle = SWIZZLE_NOOP;
++
++ this->result = src_reg;
++}
++
++void
++ir_to_mesa_visitor::visit(ir_dereference_record *ir)
++{
++ unsigned int i;
++ const glsl_type *struct_type = ir->record->type;
++ int offset = 0;
++
++ ir->record->accept(this);
++
++ for (i = 0; i < struct_type->length; i++) {
++ if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
++ break;
++ offset += type_size(struct_type->fields.structure[i].type);
++ }
++ this->result.swizzle = swizzle_for_size(ir->type->vector_elements);
++ this->result.index += offset;
++}
++
++/**
++ * We want to be careful in assignment setup to hit the actual storage
++ * instead of potentially using a temporary like we might with the
++ * ir_dereference handler.
++ *
++ * Thanks to ir_swizzle_swizzle, and ir_vec_index_to_swizzle, we
++ * should only see potentially one variable array index of a vector,
++ * and one swizzle, before getting to actual vec4 storage. So handle
++ * those, then go use ir_dereference to handle the rest.
++ */
++static struct ir_to_mesa_dst_reg
++get_assignment_lhs(ir_instruction *ir, ir_to_mesa_visitor *v,
++ ir_to_mesa_src_reg *r)
++{
++ struct ir_to_mesa_dst_reg dst_reg;
++ ir_swizzle *swiz;
++
++ ir_dereference_array *deref_array = ir->as_dereference_array();
++ /* This should have been handled by ir_vec_index_to_cond_assign */
++ if (deref_array) {
++ assert(!deref_array->array->type->is_vector());
++ }
++
++ /* Use the rvalue deref handler for the most part. We'll ignore
++ * swizzles in it and write swizzles using writemask, though.
++ */
++ ir->accept(v);
++ dst_reg = ir_to_mesa_dst_reg_from_src(v->result);
++
++ if ((swiz = ir->as_swizzle())) {
++ int swizzles[4] = {
++ swiz->mask.x,
++ swiz->mask.y,
++ swiz->mask.z,
++ swiz->mask.w
++ };
++ int new_r_swizzle[4];
++ int orig_r_swizzle = r->swizzle;
++ int i;
++
++ for (i = 0; i < 4; i++) {
++ new_r_swizzle[i] = GET_SWZ(orig_r_swizzle, 0);
++ }
++
++ dst_reg.writemask = 0;
++ for (i = 0; i < 4; i++) {
++ if (i < swiz->mask.num_components) {
++ dst_reg.writemask |= 1 << swizzles[i];
++ new_r_swizzle[swizzles[i]] = GET_SWZ(orig_r_swizzle, i);
++ }
++ }
++
++ r->swizzle = MAKE_SWIZZLE4(new_r_swizzle[0],
++ new_r_swizzle[1],
++ new_r_swizzle[2],
++ new_r_swizzle[3]);
++ }
++
++ return dst_reg;
++}
++
++void
++ir_to_mesa_visitor::visit(ir_assignment *ir)
++{
++ struct ir_to_mesa_dst_reg l;
++ struct ir_to_mesa_src_reg r;
++ int i;
++
++ assert(!ir->lhs->type->is_array());
++
++ ir->rhs->accept(this);
++ r = this->result;
++
++ l = get_assignment_lhs(ir->lhs, this, &r);
++
++ assert(l.file != PROGRAM_UNDEFINED);
++ assert(r.file != PROGRAM_UNDEFINED);
++
++ if (ir->condition) {
++ ir_to_mesa_src_reg condition;
++
++ ir->condition->accept(this);
++ condition = this->result;
++
++ /* We use the OPCODE_CMP (a < 0 ? b : c) for conditional moves,
++ * and the condition we produced is 0.0 or 1.0. By flipping the
++ * sign, we can choose which value OPCODE_CMP produces without
++ * an extra computing the condition.
++ */
++ condition.negate = ~condition.negate;
++ for (i = 0; i < type_size(ir->lhs->type); i++) {
++ ir_to_mesa_emit_op3(ir, OPCODE_CMP, l,
++ condition, r, ir_to_mesa_src_reg_from_dst(l));
++ l.index++;
++ r.index++;
++ }
++ } else {
++ for (i = 0; i < type_size(ir->lhs->type); i++) {
++ ir_to_mesa_emit_op1(ir, OPCODE_MOV, l, r);
++ l.index++;
++ r.index++;
++ }
++ }
++}
++
++
++void
++ir_to_mesa_visitor::visit(ir_constant *ir)
++{
++ ir_to_mesa_src_reg src_reg;
++ GLfloat stack_vals[4];
++ GLfloat *values = stack_vals;
++ unsigned int i;
++
++ if (ir->type->is_array()) {
++ ir->print();
++ printf("\n");
++ assert(!"FINISHME: array constants");
++ }
++
++ if (ir->type->is_matrix()) {
++ /* Unfortunately, 4 floats is all we can get into
++ * _mesa_add_unnamed_constant. So, make a temp to store the
++ * matrix and move each constant value into it. If we get
++ * lucky, copy propagation will eliminate the extra moves.
++ */
++ ir_to_mesa_src_reg mat = get_temp(glsl_type::vec4_type);
++ ir_to_mesa_dst_reg mat_column = ir_to_mesa_dst_reg_from_src(mat);
++
++ for (i = 0; i < ir->type->matrix_columns; i++) {
++ src_reg.file = PROGRAM_CONSTANT;
++
++ assert(ir->type->base_type == GLSL_TYPE_FLOAT);
++ values = &ir->value.f[i * ir->type->vector_elements];
++
++ src_reg.index = _mesa_add_unnamed_constant(this->prog->Parameters,
++ values,
++ ir->type->vector_elements,
++ &src_reg.swizzle);
++ src_reg.reladdr = NULL;
++ src_reg.negate = 0;
++ ir_to_mesa_emit_op1(ir, OPCODE_MOV, mat_column, src_reg);
++
++ mat_column.index++;
++ }
++
++ this->result = mat;
++ }
++
++ src_reg.file = PROGRAM_CONSTANT;
++ switch (ir->type->base_type) {
++ case GLSL_TYPE_FLOAT:
++ values = &ir->value.f[0];
++ break;
++ case GLSL_TYPE_UINT:
++ for (i = 0; i < ir->type->vector_elements; i++) {
++ values[i] = ir->value.u[i];
++ }
++ break;
++ case GLSL_TYPE_INT:
++ for (i = 0; i < ir->type->vector_elements; i++) {
++ values[i] = ir->value.i[i];
++ }
++ break;
++ case GLSL_TYPE_BOOL:
++ for (i = 0; i < ir->type->vector_elements; i++) {
++ values[i] = ir->value.b[i];
++ }
++ break;
++ default:
++ assert(!"Non-float/uint/int/bool constant");
++ }
++
++ src_reg.index = _mesa_add_unnamed_constant(this->prog->Parameters,
++ values, ir->type->vector_elements,
++ &src_reg.swizzle);
++ src_reg.reladdr = NULL;
++ src_reg.negate = 0;
++
++ this->result = src_reg;
++}
++
++function_entry *
++ir_to_mesa_visitor::get_function_signature(ir_function_signature *sig)
++{
++ function_entry *entry;
++
++ foreach_iter(exec_list_iterator, iter, this->function_signatures) {
++ entry = (function_entry *)iter.get();
++
++ if (entry->sig == sig)
++ return entry;
++ }
++
++ entry = talloc(mem_ctx, function_entry);
++ entry->sig = sig;
++ entry->sig_id = this->next_signature_id++;
++ entry->bgn_inst = NULL;
++
++ /* Allocate storage for all the parameters. */
++ foreach_iter(exec_list_iterator, iter, sig->parameters) {
++ ir_variable *param = (ir_variable *)iter.get();
++ variable_storage *storage;
++
++ storage = find_variable_storage(param);
++ assert(!storage);
++
++ storage = new(mem_ctx) variable_storage(param, PROGRAM_TEMPORARY,
++ this->next_temp);
++ this->variables.push_tail(storage);
++
++ this->next_temp += type_size(param->type);
++ break;
++ }
++
++ if (sig->return_type) {
++ entry->return_reg = get_temp(sig->return_type);
++ } else {
++ entry->return_reg = ir_to_mesa_undef;
++ }
++
++ this->function_signatures.push_tail(entry);
++ return entry;
++}
++
++void
++ir_to_mesa_visitor::visit(ir_call *ir)
++{
++ ir_to_mesa_instruction *call_inst;
++ ir_function_signature *sig = ir->get_callee();
++ function_entry *entry = get_function_signature(sig);
++ int i;
++
++ /* Process in parameters. */
++ exec_list_iterator sig_iter = sig->parameters.iterator();
++ foreach_iter(exec_list_iterator, iter, *ir) {
++ ir_rvalue *param_rval = (ir_rvalue *)iter.get();
++ ir_variable *param = (ir_variable *)sig_iter.get();
++
++ if (param->mode == ir_var_in ||
++ param->mode == ir_var_inout) {
++ variable_storage *storage = find_variable_storage(param);
++ assert(storage);
++
++ param_rval->accept(this);
++ ir_to_mesa_src_reg r = this->result;
++
++ ir_to_mesa_dst_reg l;
++ l.file = storage->file;
++ l.index = storage->index;
++ l.reladdr = NULL;
++ l.writemask = WRITEMASK_XYZW;
++ l.cond_mask = COND_TR;
++
++ for (i = 0; i < type_size(param->type); i++) {
++ ir_to_mesa_emit_op1(ir, OPCODE_MOV, l, r);
++ l.index++;
++ r.index++;
++ }
++ }
++
++ sig_iter.next();
++ }
++ assert(!sig_iter.has_next());
++
++ /* Emit call instruction */
++ call_inst = ir_to_mesa_emit_op1(ir, OPCODE_CAL,
++ ir_to_mesa_undef_dst, ir_to_mesa_undef);
++ call_inst->function = entry;
++
++ /* Process out parameters. */
++ sig_iter = sig->parameters.iterator();
++ foreach_iter(exec_list_iterator, iter, *ir) {
++ ir_rvalue *param_rval = (ir_rvalue *)iter.get();
++ ir_variable *param = (ir_variable *)sig_iter.get();
++
++ if (param->mode == ir_var_out ||
++ param->mode == ir_var_inout) {
++ variable_storage *storage = find_variable_storage(param);
++ assert(storage);
++
++ ir_to_mesa_src_reg r;
++ r.file = storage->file;
++ r.index = storage->index;
++ r.reladdr = NULL;
++ r.swizzle = SWIZZLE_NOOP;
++ r.negate = 0;
++
++ param_rval->accept(this);
++ ir_to_mesa_dst_reg l = ir_to_mesa_dst_reg_from_src(this->result);
++
++ for (i = 0; i < type_size(param->type); i++) {
++ ir_to_mesa_emit_op1(ir, OPCODE_MOV, l, r);
++ l.index++;
++ r.index++;
++ }
++ }
++
++ sig_iter.next();
++ }
++ assert(!sig_iter.has_next());
++
++ /* Process return value. */
++ this->result = entry->return_reg;
++}
++
++
++void
++ir_to_mesa_visitor::visit(ir_texture *ir)
++{
++ ir_to_mesa_src_reg result_src, coord, lod_info = { 0 }, projector;
++ ir_to_mesa_dst_reg result_dst, coord_dst;
++ ir_to_mesa_instruction *inst = NULL;
++ prog_opcode opcode = OPCODE_NOP;
++
++ ir->coordinate->accept(this);
++
++ /* Put our coords in a temp. We'll need to modify them for shadow,
++ * projection, or LOD, so the only case we'd use it as is is if
++ * we're doing plain old texturing. Mesa IR optimization should
++ * handle cleaning up our mess in that case.
++ */
++ coord = get_temp(glsl_type::vec4_type);
++ coord_dst = ir_to_mesa_dst_reg_from_src(coord);
++ ir_to_mesa_emit_op1(ir, OPCODE_MOV, coord_dst,
++ this->result);
++
++ if (ir->projector) {
++ ir->projector->accept(this);
++ projector = this->result;
++ }
++
++ /* Storage for our result. Ideally for an assignment we'd be using
++ * the actual storage for the result here, instead.
++ */
++ result_src = get_temp(glsl_type::vec4_type);
++ result_dst = ir_to_mesa_dst_reg_from_src(result_src);
++
++ switch (ir->op) {
++ case ir_tex:
++ opcode = OPCODE_TEX;
++ break;
++ case ir_txb:
++ opcode = OPCODE_TXB;
++ ir->lod_info.bias->accept(this);
++ lod_info = this->result;
++ break;
++ case ir_txl:
++ opcode = OPCODE_TXL;
++ ir->lod_info.lod->accept(this);
++ lod_info = this->result;
++ break;
++ case ir_txd:
++ case ir_txf:
++ assert(!"GLSL 1.30 features unsupported");
++ break;
++ }
++
++ if (ir->projector) {
++ if (opcode == OPCODE_TEX) {
++ /* Slot the projector in as the last component of the coord. */
++ coord_dst.writemask = WRITEMASK_W;
++ ir_to_mesa_emit_op1(ir, OPCODE_MOV, coord_dst, projector);
++ coord_dst.writemask = WRITEMASK_XYZW;
++ opcode = OPCODE_TXP;
++ } else {
++ ir_to_mesa_src_reg coord_w = coord;
++ coord_w.swizzle = SWIZZLE_WWWW;
++
++ /* For the other TEX opcodes there's no projective version
++ * since the last slot is taken up by lod info. Do the
++ * projective divide now.
++ */
++ coord_dst.writemask = WRITEMASK_W;
++ ir_to_mesa_emit_op1(ir, OPCODE_RCP, coord_dst, projector);
++
++ coord_dst.writemask = WRITEMASK_XYZ;
++ ir_to_mesa_emit_op2(ir, OPCODE_MUL, coord_dst, coord, coord_w);
++
++ coord_dst.writemask = WRITEMASK_XYZW;
++ coord.swizzle = SWIZZLE_XYZW;
++ }
++ }
++
++ if (ir->shadow_comparitor) {
++ /* Slot the shadow value in as the second to last component of the
++ * coord.
++ */
++ ir->shadow_comparitor->accept(this);
++ coord_dst.writemask = WRITEMASK_Z;
++ ir_to_mesa_emit_op1(ir, OPCODE_MOV, coord_dst, this->result);
++ coord_dst.writemask = WRITEMASK_XYZW;
++ }
++
++ if (opcode == OPCODE_TXL || opcode == OPCODE_TXB) {
++ /* Mesa IR stores lod or lod bias in the last channel of the coords. */
++ coord_dst.writemask = WRITEMASK_W;
++ ir_to_mesa_emit_op1(ir, OPCODE_MOV, coord_dst, lod_info);
++ coord_dst.writemask = WRITEMASK_XYZW;
++ }
++
++ inst = ir_to_mesa_emit_op1(ir, opcode, result_dst, coord);
++
++ if (ir->shadow_comparitor)
++ inst->tex_shadow = GL_TRUE;
++
++ ir_dereference_variable *sampler = ir->sampler->as_dereference_variable();
++ assert(sampler); /* FINISHME: sampler arrays */
++ /* generate the mapping, remove when we generate storage at
++ * declaration time
++ */
++ sampler->accept(this);
++
++ inst->sampler = get_sampler_number(sampler->var->location);
++
++ switch (sampler->type->sampler_dimensionality) {
++ case GLSL_SAMPLER_DIM_1D:
++ inst->tex_target = TEXTURE_1D_INDEX;
++ break;
++ case GLSL_SAMPLER_DIM_2D:
++ inst->tex_target = TEXTURE_2D_INDEX;
++ break;
++ case GLSL_SAMPLER_DIM_3D:
++ inst->tex_target = TEXTURE_3D_INDEX;
++ break;
++ case GLSL_SAMPLER_DIM_CUBE:
++ inst->tex_target = TEXTURE_CUBE_INDEX;
++ break;
++ default:
++ assert(!"FINISHME: other texture targets");
++ }
++
++ this->result = result_src;
++}
++
++void
++ir_to_mesa_visitor::visit(ir_return *ir)
++{
++ assert(current_function);
++
++ if (ir->get_value()) {
++ ir_to_mesa_dst_reg l;
++ int i;
++
++ ir->get_value()->accept(this);
++ ir_to_mesa_src_reg r = this->result;
++
++ l = ir_to_mesa_dst_reg_from_src(current_function->return_reg);
++
++ for (i = 0; i < type_size(current_function->sig->return_type); i++) {
++ ir_to_mesa_emit_op1(ir, OPCODE_MOV, l, r);
++ l.index++;
++ r.index++;
++ }
++ }
++
++ ir_to_mesa_emit_op0(ir, OPCODE_RET);
++}
++
++void
++ir_to_mesa_visitor::visit(ir_discard *ir)
++{
++ assert(ir->condition == NULL); /* FINISHME */
++
++ ir_to_mesa_emit_op0(ir, OPCODE_KIL_NV);
++}
++
++void
++ir_to_mesa_visitor::visit(ir_if *ir)
++{
++ ir_to_mesa_instruction *cond_inst, *if_inst, *else_inst = NULL;
++ ir_to_mesa_instruction *prev_inst;
++
++ prev_inst = (ir_to_mesa_instruction *)this->instructions.get_tail();
++
++ ir->condition->accept(this);
++ assert(this->result.file != PROGRAM_UNDEFINED);
++
++ if (ctx->Shader.EmitCondCodes) {
++ cond_inst = (ir_to_mesa_instruction *)this->instructions.get_tail();
++
++ /* See if we actually generated any instruction for generating
++ * the condition. If not, then cook up a move to a temp so we
++ * have something to set cond_update on.
++ */
++ if (cond_inst == prev_inst) {
++ ir_to_mesa_src_reg temp = get_temp(glsl_type::bool_type);
++ cond_inst = ir_to_mesa_emit_op1(ir->condition, OPCODE_MOV,
++ ir_to_mesa_dst_reg_from_src(temp),
++ result);
++ }
++ cond_inst->cond_update = GL_TRUE;
++
++ if_inst = ir_to_mesa_emit_op0(ir->condition, OPCODE_IF);
++ if_inst->dst_reg.cond_mask = COND_NE;
++ } else {
++ if_inst = ir_to_mesa_emit_op1(ir->condition,
++ OPCODE_IF, ir_to_mesa_undef_dst,
++ this->result);
++ }
++
++ this->instructions.push_tail(if_inst);
++
++ visit_exec_list(&ir->then_instructions, this);
++
++ if (!ir->else_instructions.is_empty()) {
++ else_inst = ir_to_mesa_emit_op0(ir->condition, OPCODE_ELSE);
++ visit_exec_list(&ir->else_instructions, this);
++ }
++
++ if_inst = ir_to_mesa_emit_op1(ir->condition, OPCODE_ENDIF,
++ ir_to_mesa_undef_dst, ir_to_mesa_undef);
++}
++
++ir_to_mesa_visitor::ir_to_mesa_visitor()
++{
++ result.file = PROGRAM_UNDEFINED;
++ next_temp = 1;
++ next_signature_id = 1;
++ sampler_map = NULL;
++ sampler_map_size = 0;
++ current_function = NULL;
++}
++
++static struct prog_src_register
++mesa_src_reg_from_ir_src_reg(ir_to_mesa_src_reg reg)
++{
++ struct prog_src_register mesa_reg;
++
++ mesa_reg.File = reg.file;
++ assert(reg.index < (1 << INST_INDEX_BITS) - 1);
++ mesa_reg.Index = reg.index;
++ mesa_reg.Swizzle = reg.swizzle;
++ mesa_reg.RelAddr = reg.reladdr != NULL;
++ mesa_reg.Negate = reg.negate;
++ mesa_reg.Abs = 0;
++
++ return mesa_reg;
++}
++
++static void
++set_branchtargets(ir_to_mesa_visitor *v,
++ struct prog_instruction *mesa_instructions,
++ int num_instructions)
++{
++ int if_count = 0, loop_count = 0;
++ int *if_stack, *loop_stack;
++ int if_stack_pos = 0, loop_stack_pos = 0;
++ int i, j;
++
++ for (i = 0; i < num_instructions; i++) {
++ switch (mesa_instructions[i].Opcode) {
++ case OPCODE_IF:
++ if_count++;
++ break;
++ case OPCODE_BGNLOOP:
++ loop_count++;
++ break;
++ case OPCODE_BRK:
++ case OPCODE_CONT:
++ mesa_instructions[i].BranchTarget = -1;
++ break;
++ default:
++ break;
++ }
++ }
++
++ if_stack = (int *)calloc(if_count, sizeof(*if_stack));
++ loop_stack = (int *)calloc(loop_count, sizeof(*loop_stack));
++
++ for (i = 0; i < num_instructions; i++) {
++ switch (mesa_instructions[i].Opcode) {
++ case OPCODE_IF:
++ if_stack[if_stack_pos] = i;
++ if_stack_pos++;
++ break;
++ case OPCODE_ELSE:
++ mesa_instructions[if_stack[if_stack_pos - 1]].BranchTarget = i;
++ if_stack[if_stack_pos - 1] = i;
++ break;
++ case OPCODE_ENDIF:
++ mesa_instructions[if_stack[if_stack_pos - 1]].BranchTarget = i;
++ if_stack_pos--;
++ break;
++ case OPCODE_BGNLOOP:
++ loop_stack[loop_stack_pos] = i;
++ loop_stack_pos++;
++ break;
++ case OPCODE_ENDLOOP:
++ loop_stack_pos--;
++ /* Rewrite any breaks/conts at this nesting level (haven't
++ * already had a BranchTarget assigned) to point to the end
++ * of the loop.
++ */
++ for (j = loop_stack[loop_stack_pos]; j < i; j++) {
++ if (mesa_instructions[j].Opcode == OPCODE_BRK ||
++ mesa_instructions[j].Opcode == OPCODE_CONT) {
++ if (mesa_instructions[j].BranchTarget == -1) {
++ mesa_instructions[j].BranchTarget = i;
++ }
++ }
++ }
++ /* The loop ends point at each other. */
++ mesa_instructions[i].BranchTarget = loop_stack[loop_stack_pos];
++ mesa_instructions[loop_stack[loop_stack_pos]].BranchTarget = i;
++ break;
++ case OPCODE_CAL:
++ foreach_iter(exec_list_iterator, iter, v->function_signatures) {
++ function_entry *entry = (function_entry *)iter.get();
++
++ if (entry->sig_id == mesa_instructions[i].BranchTarget) {
++ mesa_instructions[i].BranchTarget = entry->inst;
++ break;
++ }
++ }
++ break;
++ default:
++ break;
++ }
++ }
++
++ free(if_stack);
++}
++
++static void
++print_program(struct prog_instruction *mesa_instructions,
++ ir_instruction **mesa_instruction_annotation,
++ int num_instructions)
++{
++ ir_instruction *last_ir = NULL;
++ int i;
++ int indent = 0;
++
++ for (i = 0; i < num_instructions; i++) {
++ struct prog_instruction *mesa_inst = mesa_instructions + i;
++ ir_instruction *ir = mesa_instruction_annotation[i];
++
++ fprintf(stdout, "%3d: ", i);
++
++ if (last_ir != ir && ir) {
++ int j;
++
++ for (j = 0; j < indent; j++) {
++ fprintf(stdout, " ");
++ }
++ ir->print();
++ printf("\n");
++ last_ir = ir;
++
++ fprintf(stdout, " "); /* line number spacing. */
++ }
++
++ indent = _mesa_fprint_instruction_opt(stdout, mesa_inst, indent,
++ PROG_PRINT_DEBUG, NULL);
++ }
++}
++
++static void
++mark_input(struct gl_program *prog,
++ int index,
++ GLboolean reladdr)
++{
++ prog->InputsRead |= BITFIELD64_BIT(index);
++ int i;
++
++ if (reladdr) {
++ if (index >= FRAG_ATTRIB_TEX0 && index <= FRAG_ATTRIB_TEX7) {
++ for (i = 0; i < 8; i++) {
++ prog->InputsRead |= BITFIELD64_BIT(FRAG_ATTRIB_TEX0 + i);
++ }
++ } else {
++ assert(!"FINISHME: Mark InputsRead for varying arrays");
++ }
++ }
++}
++
++static void
++mark_output(struct gl_program *prog,
++ int index,
++ GLboolean reladdr)
++{
++ prog->OutputsWritten |= BITFIELD64_BIT(index);
++ int i;
++
++ if (reladdr) {
++ if (index >= VERT_RESULT_TEX0 && index <= VERT_RESULT_TEX7) {
++ for (i = 0; i < 8; i++) {
++ prog->OutputsWritten |= BITFIELD64_BIT(FRAG_ATTRIB_TEX0 + i);
++ }
++ } else {
++ assert(!"FINISHME: Mark OutputsWritten for varying arrays");
++ }
++ }
++}
++
++static void
++count_resources(struct gl_program *prog)
++{
++ unsigned int i;
++
++ prog->InputsRead = 0;
++ prog->OutputsWritten = 0;
++ prog->SamplersUsed = 0;
++
++ for (i = 0; i < prog->NumInstructions; i++) {
++ struct prog_instruction *inst = &prog->Instructions[i];
++ unsigned int reg;
++
++ switch (inst->DstReg.File) {
++ case PROGRAM_OUTPUT:
++ mark_output(prog, inst->DstReg.Index, inst->DstReg.RelAddr);
++ break;
++ case PROGRAM_INPUT:
++ mark_input(prog, inst->DstReg.Index, inst->DstReg.RelAddr);
++ break;
++ default:
++ break;
++ }
++
++ for (reg = 0; reg < _mesa_num_inst_src_regs(inst->Opcode); reg++) {
++ switch (inst->SrcReg[reg].File) {
++ case PROGRAM_OUTPUT:
++ mark_output(prog, inst->SrcReg[reg].Index,
++ inst->SrcReg[reg].RelAddr);
++ break;
++ case PROGRAM_INPUT:
++ mark_input(prog, inst->SrcReg[reg].Index, inst->SrcReg[reg].RelAddr);
++ break;
++ default:
++ break;
++ }
++ }
++
++ /* Instead of just using the uniform's value to map to a
++ * sampler, Mesa first allocates a separate number for the
++ * sampler (_mesa_add_sampler), then we reindex it down to a
++ * small integer (sampler_map[], SamplersUsed), then that gets
++ * mapped to the uniform's value, and we get an actual sampler.
++ */
++ if (_mesa_is_tex_instruction(inst->Opcode)) {
++ prog->SamplerTargets[inst->TexSrcUnit] =
++ (gl_texture_index)inst->TexSrcTarget;
++ prog->SamplersUsed |= 1 << inst->TexSrcUnit;
++ if (inst->TexShadow) {
++ prog->ShadowSamplers |= 1 << inst->TexSrcUnit;
++ }
++ }
++ }
++
++ _mesa_update_shader_textures_used(prog);
++}
++
++/* Each stage has some uniforms in its Parameters list. The Uniforms
++ * list for the linked shader program has a pointer to these uniforms
++ * in each of the stage's Parameters list, so that their values can be
++ * updated when a uniform is set.
++ */
++static void
++link_uniforms_to_shared_uniform_list(struct gl_uniform_list *uniforms,
++ struct gl_program *prog)
++{
++ unsigned int i;
++
++ for (i = 0; i < prog->Parameters->NumParameters; i++) {
++ const struct gl_program_parameter *p = prog->Parameters->Parameters + i;
++
++ if (p->Type == PROGRAM_UNIFORM || p->Type == PROGRAM_SAMPLER) {
++ struct gl_uniform *uniform =
++ _mesa_append_uniform(uniforms, p->Name, prog->Target, i);
++ if (uniform)
++ uniform->Initialized = p->Initialized;
++ }
++ }
++}
++
++struct gl_program *
++get_mesa_program(GLcontext *ctx, struct gl_shader_program *shader_program,
++ struct gl_shader *shader)
++{
++ void *mem_ctx = shader_program;
++ ir_to_mesa_visitor v;
++ struct prog_instruction *mesa_instructions, *mesa_inst;
++ ir_instruction **mesa_instruction_annotation;
++ int i;
++ struct gl_program *prog;
++ GLenum target;
++ const char *target_string;
++ GLboolean progress;
++
++ switch (shader->Type) {
++ case GL_VERTEX_SHADER:
++ target = GL_VERTEX_PROGRAM_ARB;
++ target_string = "vertex";
++ break;
++ case GL_FRAGMENT_SHADER:
++ target = GL_FRAGMENT_PROGRAM_ARB;
++ target_string = "fragment";
++ break;
++ default:
++ assert(!"should not be reached");
++ break;
++ }
++
++ validate_ir_tree(shader->ir);
++
++ prog = ctx->Driver.NewProgram(ctx, target, 1);
++ if (!prog)
++ return NULL;
++ prog->Parameters = _mesa_new_parameter_list();
++ prog->Varying = _mesa_new_parameter_list();
++ prog->Attributes = _mesa_new_parameter_list();
++ v.ctx = ctx;
++ v.prog = prog;
++
++ v.mem_ctx = talloc_new(NULL);
++
++ /* Emit Mesa IR for main(). */
++ visit_exec_list(shader->ir, &v);
++ v.ir_to_mesa_emit_op0(NULL, OPCODE_END);
++
++ /* Now emit bodies for any functions that were used. */
++ do {
++ progress = GL_FALSE;
++
++ foreach_iter(exec_list_iterator, iter, v.function_signatures) {
++ function_entry *entry = (function_entry *)iter.get();
++
++ if (!entry->bgn_inst) {
++ v.current_function = entry;
++
++ entry->bgn_inst = v.ir_to_mesa_emit_op0(NULL, OPCODE_BGNSUB);
++ entry->bgn_inst->function = entry;
++
++ visit_exec_list(&entry->sig->body, &v);
++
++ entry->bgn_inst = v.ir_to_mesa_emit_op0(NULL, OPCODE_RET);
++ entry->bgn_inst = v.ir_to_mesa_emit_op0(NULL, OPCODE_ENDSUB);
++ progress = GL_TRUE;
++ }
++ }
++ } while (progress);
++
++ prog->NumTemporaries = v.next_temp;
++
++ int num_instructions = 0;
++ foreach_iter(exec_list_iterator, iter, v.instructions) {
++ num_instructions++;
++ }
++
++ mesa_instructions =
++ (struct prog_instruction *)calloc(num_instructions,
++ sizeof(*mesa_instructions));
++ mesa_instruction_annotation = talloc_array(mem_ctx, ir_instruction *,
++ num_instructions);
++
++ mesa_inst = mesa_instructions;
++ i = 0;
++ foreach_iter(exec_list_iterator, iter, v.instructions) {
++ ir_to_mesa_instruction *inst = (ir_to_mesa_instruction *)iter.get();
++
++ mesa_inst->Opcode = inst->op;
++ mesa_inst->CondUpdate = inst->cond_update;
++ mesa_inst->DstReg.File = inst->dst_reg.file;
++ mesa_inst->DstReg.Index = inst->dst_reg.index;
++ mesa_inst->DstReg.CondMask = inst->dst_reg.cond_mask;
++ mesa_inst->DstReg.WriteMask = inst->dst_reg.writemask;
++ mesa_inst->DstReg.RelAddr = inst->dst_reg.reladdr != NULL;
++ mesa_inst->SrcReg[0] = mesa_src_reg_from_ir_src_reg(inst->src_reg[0]);
++ mesa_inst->SrcReg[1] = mesa_src_reg_from_ir_src_reg(inst->src_reg[1]);
++ mesa_inst->SrcReg[2] = mesa_src_reg_from_ir_src_reg(inst->src_reg[2]);
++ mesa_inst->TexSrcUnit = inst->sampler;
++ mesa_inst->TexSrcTarget = inst->tex_target;
++ mesa_inst->TexShadow = inst->tex_shadow;
++ mesa_instruction_annotation[i] = inst->ir;
++
++ if (ctx->Shader.EmitNoIfs && mesa_inst->Opcode == OPCODE_IF) {
++ shader_program->InfoLog =
++ talloc_asprintf_append(shader_program->InfoLog,
++ "Couldn't flatten if statement\n");
++ shader_program->LinkStatus = false;
++ }
++
++ if (mesa_inst->Opcode == OPCODE_BGNSUB)
++ inst->function->inst = i;
++ else if (mesa_inst->Opcode == OPCODE_CAL)
++ mesa_inst->BranchTarget = inst->function->sig_id; /* rewritten later */
++ else if (mesa_inst->Opcode == OPCODE_ARL)
++ prog->NumAddressRegs = 1;
++
++ mesa_inst++;
++ i++;
++ }
++
++ set_branchtargets(&v, mesa_instructions, num_instructions);
++ if (ctx->Shader.Flags & GLSL_DUMP) {
++ printf("Mesa %s program:\n", target_string);
++ print_program(mesa_instructions, mesa_instruction_annotation,
++ num_instructions);
++ }
++
++ prog->Instructions = mesa_instructions;
++ prog->NumInstructions = num_instructions;
++
++ _mesa_reference_program(ctx, &shader->Program, prog);
++
++ if ((ctx->Shader.Flags & GLSL_NO_OPT) == 0) {
++ _mesa_optimize_program(ctx, prog);
++ }
++
++ return prog;
++}
++
++extern "C" {
++
++void
++_mesa_glsl_compile_shader(GLcontext *ctx, struct gl_shader *shader)
++{
++ struct _mesa_glsl_parse_state *state =
++ new(shader) _mesa_glsl_parse_state(ctx, shader->Type, shader);
++
++ const char *source = shader->Source;
++ state->error = preprocess(state, &source, &state->info_log,
++ &ctx->Extensions);
++
++ if (!state->error) {
++ _mesa_glsl_lexer_ctor(state, source);
++ _mesa_glsl_parse(state);
++ _mesa_glsl_lexer_dtor(state);
++ }
++
++ shader->ir = new(shader) exec_list;
++ if (!state->error && !state->translation_unit.is_empty())
++ _mesa_ast_to_hir(shader->ir, state);
++
++ if (!state->error && !shader->ir->is_empty()) {
++ validate_ir_tree(shader->ir);
++
++ /* Lowering */
++ do_mat_op_to_vec(shader->ir);
++ do_mod_to_fract(shader->ir);
++ do_div_to_mul_rcp(shader->ir);
++
++ /* Optimization passes */
++ bool progress;
++ do {
++ progress = false;
++
++ progress = do_function_inlining(shader->ir) || progress;
++ progress = do_if_simplification(shader->ir) || progress;
++ progress = do_copy_propagation(shader->ir) || progress;
++ progress = do_dead_code_local(shader->ir) || progress;
++ progress = do_dead_code_unlinked(state, shader->ir) || progress;
++ progress = do_constant_variable_unlinked(shader->ir) || progress;
++ progress = do_constant_folding(shader->ir) || progress;
++ progress = do_if_return(shader->ir) || progress;
++ if (ctx->Shader.EmitNoIfs)
++ progress = do_if_to_cond_assign(shader->ir) || progress;
++
++ progress = do_vec_index_to_swizzle(shader->ir) || progress;
++ /* Do this one after the previous to let the easier pass handle
++ * constant vector indexing.
++ */
++ progress = do_vec_index_to_cond_assign(shader->ir) || progress;
++
++ progress = do_swizzle_swizzle(shader->ir) || progress;
++ } while (progress);
++
++ validate_ir_tree(shader->ir);
++ }
++
++ shader->symbols = state->symbols;
++
++ shader->CompileStatus = !state->error;
++ shader->InfoLog = state->info_log;
++ shader->Version = state->language_version;
++ memcpy(shader->builtins_to_link, state->builtins_to_link,
++ sizeof(shader->builtins_to_link[0]) * state->num_builtins_to_link);
++ shader->num_builtins_to_link = state->num_builtins_to_link;
++
++ /* Retain any live IR, but trash the rest. */
++ reparent_ir(shader->ir, shader);
++
++ talloc_free(state);
++ }
++
++void
++_mesa_glsl_link_shader(GLcontext *ctx, struct gl_shader_program *prog)
++{
++ unsigned int i;
++
++ _mesa_clear_shader_program_data(ctx, prog);
++
++ prog->LinkStatus = GL_TRUE;
++
++ for (i = 0; i < prog->NumShaders; i++) {
++ if (!prog->Shaders[i]->CompileStatus) {
++ prog->InfoLog =
++ talloc_asprintf_append(prog->InfoLog,
++ "linking with uncompiled shader");
++ prog->LinkStatus = GL_FALSE;
++ }
++ }
++
++ prog->Varying = _mesa_new_parameter_list();
++ _mesa_reference_vertprog(ctx, &prog->VertexProgram, NULL);
++ _mesa_reference_fragprog(ctx, &prog->FragmentProgram, NULL);
++
++ if (prog->LinkStatus) {
++ link_shaders(prog);
++
++ /* We don't use the linker's uniforms list, and cook up our own at
++ * generate time.
++ */
++ free(prog->Uniforms);
++ prog->Uniforms = _mesa_new_uniform_list();
++ }
++
++ if (prog->LinkStatus) {
++ for (i = 0; i < prog->_NumLinkedShaders; i++) {
++ struct gl_program *linked_prog;
++
++ linked_prog = get_mesa_program(ctx, prog,
++ prog->_LinkedShaders[i]);
++ count_resources(linked_prog);
++
++ link_uniforms_to_shared_uniform_list(prog->Uniforms, linked_prog);
++
++ switch (prog->_LinkedShaders[i]->Type) {
++ case GL_VERTEX_SHADER:
++ _mesa_reference_vertprog(ctx, &prog->VertexProgram,
++ (struct gl_vertex_program *)linked_prog);
++ ctx->Driver.ProgramStringNotify(ctx, GL_VERTEX_PROGRAM_ARB,
++ linked_prog);
++ break;
++ case GL_FRAGMENT_SHADER:
++ _mesa_reference_fragprog(ctx, &prog->FragmentProgram,
++ (struct gl_fragment_program *)linked_prog);
++ ctx->Driver.ProgramStringNotify(ctx, GL_FRAGMENT_PROGRAM_ARB,
++ linked_prog);
++ break;
++ }
++ }
++ }
++}
++
++} /* extern "C" */
--- /dev/null
--- /dev/null
++/*
++ * Copyright © 2010 Intel Corporation
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a
++ * copy of this software and associated documentation files (the "Software"),
++ * to deal in the Software without restriction, including without limitation
++ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
++ * and/or sell copies of the Software, and to permit persons to whom the
++ * Software is furnished to do so, subject to the following conditions:
++ *
++ * The above copyright notice and this permission notice (including the next
++ * paragraph) shall be included in all copies or substantial portions of the
++ * Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
++ * DEALINGS IN THE SOFTWARE.
++ */
++
++#ifdef __cplusplus
++extern "C" {
++#endif
++
++#include "main/config.h"
++#include "main/mtypes.h"
++
++void _mesa_glsl_compile_shader(GLcontext *ctx, struct gl_shader *sh);
++void _mesa_glsl_link_shader(GLcontext *ctx, struct gl_shader_program *prog);
++
++#ifdef __cplusplus
++}
++#endif
--- /dev/null
- _mesa_problem(ctx, "Infinite loop detected in fragment program");
+ /*
+ * Mesa 3-D graphics library
+ * Version: 7.3
+ *
+ * Copyright (C) 1999-2008 Brian Paul All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+ /**
+ * \file prog_execute.c
+ * Software interpreter for vertex/fragment programs.
+ * \author Brian Paul
+ */
+
+ /*
+ * NOTE: we do everything in single-precision floating point; we don't
+ * currently observe the single/half/fixed-precision qualifiers.
+ *
+ */
+
+
+ #include "main/glheader.h"
+ #include "main/colormac.h"
+ #include "main/context.h"
+ #include "prog_execute.h"
+ #include "prog_instruction.h"
+ #include "prog_parameter.h"
+ #include "prog_print.h"
+ #include "prog_noise.h"
+
+
+ /* debug predicate */
+ #define DEBUG_PROG 0
+
+
+ /**
+ * Set x to positive or negative infinity.
+ */
+ #if defined(USE_IEEE) || defined(_WIN32)
+ #define SET_POS_INFINITY(x) \
+ do { \
+ fi_type fi; \
+ fi.i = 0x7F800000; \
+ x = fi.f; \
+ } while (0)
+ #define SET_NEG_INFINITY(x) \
+ do { \
+ fi_type fi; \
+ fi.i = 0xFF800000; \
+ x = fi.f; \
+ } while (0)
+ #elif defined(VMS)
+ #define SET_POS_INFINITY(x) x = __MAXFLOAT
+ #define SET_NEG_INFINITY(x) x = -__MAXFLOAT
+ #else
+ #define SET_POS_INFINITY(x) x = (GLfloat) HUGE_VAL
+ #define SET_NEG_INFINITY(x) x = (GLfloat) -HUGE_VAL
+ #endif
+
+ #define SET_FLOAT_BITS(x, bits) ((fi_type *) (void *) &(x))->i = bits
+
+
+ static const GLfloat ZeroVec[4] = { 0.0F, 0.0F, 0.0F, 0.0F };
+
+
+
+ /**
+ * Return a pointer to the 4-element float vector specified by the given
+ * source register.
+ */
+ static INLINE const GLfloat *
+ get_src_register_pointer(const struct prog_src_register *source,
+ const struct gl_program_machine *machine)
+ {
+ const struct gl_program *prog = machine->CurProgram;
+ GLint reg = source->Index;
+
+ if (source->RelAddr) {
+ /* add address register value to src index/offset */
+ reg += machine->AddressReg[0][0];
+ if (reg < 0) {
+ return ZeroVec;
+ }
+ }
+
+ switch (source->File) {
+ case PROGRAM_TEMPORARY:
+ if (reg >= MAX_PROGRAM_TEMPS)
+ return ZeroVec;
+ return machine->Temporaries[reg];
+
+ case PROGRAM_INPUT:
+ if (prog->Target == GL_VERTEX_PROGRAM_ARB) {
+ if (reg >= VERT_ATTRIB_MAX)
+ return ZeroVec;
+ return machine->VertAttribs[reg];
+ }
+ else {
+ if (reg >= FRAG_ATTRIB_MAX)
+ return ZeroVec;
+ return machine->Attribs[reg][machine->CurElement];
+ }
+
+ case PROGRAM_OUTPUT:
+ if (reg >= MAX_PROGRAM_OUTPUTS)
+ return ZeroVec;
+ return machine->Outputs[reg];
+
+ case PROGRAM_LOCAL_PARAM:
+ if (reg >= MAX_PROGRAM_LOCAL_PARAMS)
+ return ZeroVec;
+ return machine->CurProgram->LocalParams[reg];
+
+ case PROGRAM_ENV_PARAM:
+ if (reg >= MAX_PROGRAM_ENV_PARAMS)
+ return ZeroVec;
+ return machine->EnvParams[reg];
+
+ case PROGRAM_STATE_VAR:
+ /* Fallthrough */
+ case PROGRAM_CONSTANT:
+ /* Fallthrough */
+ case PROGRAM_UNIFORM:
+ /* Fallthrough */
+ case PROGRAM_NAMED_PARAM:
+ if (reg >= (GLint) prog->Parameters->NumParameters)
+ return ZeroVec;
+ return prog->Parameters->ParameterValues[reg];
+
+ default:
+ _mesa_problem(NULL,
+ "Invalid src register file %d in get_src_register_pointer()",
+ source->File);
+ return NULL;
+ }
+ }
+
+
+ /**
+ * Return a pointer to the 4-element float vector specified by the given
+ * destination register.
+ */
+ static INLINE GLfloat *
+ get_dst_register_pointer(const struct prog_dst_register *dest,
+ struct gl_program_machine *machine)
+ {
+ static GLfloat dummyReg[4];
+ GLint reg = dest->Index;
+
+ if (dest->RelAddr) {
+ /* add address register value to src index/offset */
+ reg += machine->AddressReg[0][0];
+ if (reg < 0) {
+ return dummyReg;
+ }
+ }
+
+ switch (dest->File) {
+ case PROGRAM_TEMPORARY:
+ if (reg >= MAX_PROGRAM_TEMPS)
+ return dummyReg;
+ return machine->Temporaries[reg];
+
+ case PROGRAM_OUTPUT:
+ if (reg >= MAX_PROGRAM_OUTPUTS)
+ return dummyReg;
+ return machine->Outputs[reg];
+
+ case PROGRAM_WRITE_ONLY:
+ return dummyReg;
+
+ default:
+ _mesa_problem(NULL,
+ "Invalid dest register file %d in get_dst_register_pointer()",
+ dest->File);
+ return NULL;
+ }
+ }
+
+
+
+ /**
+ * Fetch a 4-element float vector from the given source register.
+ * Apply swizzling and negating as needed.
+ */
+ static void
+ fetch_vector4(const struct prog_src_register *source,
+ const struct gl_program_machine *machine, GLfloat result[4])
+ {
+ const GLfloat *src = get_src_register_pointer(source, machine);
+ ASSERT(src);
+
+ if (source->Swizzle == SWIZZLE_NOOP) {
+ /* no swizzling */
+ COPY_4V(result, src);
+ }
+ else {
+ ASSERT(GET_SWZ(source->Swizzle, 0) <= 3);
+ ASSERT(GET_SWZ(source->Swizzle, 1) <= 3);
+ ASSERT(GET_SWZ(source->Swizzle, 2) <= 3);
+ ASSERT(GET_SWZ(source->Swizzle, 3) <= 3);
+ result[0] = src[GET_SWZ(source->Swizzle, 0)];
+ result[1] = src[GET_SWZ(source->Swizzle, 1)];
+ result[2] = src[GET_SWZ(source->Swizzle, 2)];
+ result[3] = src[GET_SWZ(source->Swizzle, 3)];
+ }
+
+ if (source->Abs) {
+ result[0] = FABSF(result[0]);
+ result[1] = FABSF(result[1]);
+ result[2] = FABSF(result[2]);
+ result[3] = FABSF(result[3]);
+ }
+ if (source->Negate) {
+ ASSERT(source->Negate == NEGATE_XYZW);
+ result[0] = -result[0];
+ result[1] = -result[1];
+ result[2] = -result[2];
+ result[3] = -result[3];
+ }
+
+ #ifdef NAN_CHECK
+ assert(!IS_INF_OR_NAN(result[0]));
+ assert(!IS_INF_OR_NAN(result[0]));
+ assert(!IS_INF_OR_NAN(result[0]));
+ assert(!IS_INF_OR_NAN(result[0]));
+ #endif
+ }
+
+
+ /**
+ * Fetch a 4-element uint vector from the given source register.
+ * Apply swizzling but not negation/abs.
+ */
+ static void
+ fetch_vector4ui(const struct prog_src_register *source,
+ const struct gl_program_machine *machine, GLuint result[4])
+ {
+ const GLuint *src = (GLuint *) get_src_register_pointer(source, machine);
+ ASSERT(src);
+
+ if (source->Swizzle == SWIZZLE_NOOP) {
+ /* no swizzling */
+ COPY_4V(result, src);
+ }
+ else {
+ ASSERT(GET_SWZ(source->Swizzle, 0) <= 3);
+ ASSERT(GET_SWZ(source->Swizzle, 1) <= 3);
+ ASSERT(GET_SWZ(source->Swizzle, 2) <= 3);
+ ASSERT(GET_SWZ(source->Swizzle, 3) <= 3);
+ result[0] = src[GET_SWZ(source->Swizzle, 0)];
+ result[1] = src[GET_SWZ(source->Swizzle, 1)];
+ result[2] = src[GET_SWZ(source->Swizzle, 2)];
+ result[3] = src[GET_SWZ(source->Swizzle, 3)];
+ }
+
+ /* Note: no Negate or Abs here */
+ }
+
+
+
+ /**
+ * Fetch the derivative with respect to X or Y for the given register.
+ * XXX this currently only works for fragment program input attribs.
+ */
+ static void
+ fetch_vector4_deriv(GLcontext * ctx,
+ const struct prog_src_register *source,
+ const struct gl_program_machine *machine,
+ char xOrY, GLfloat result[4])
+ {
+ if (source->File == PROGRAM_INPUT &&
+ source->Index < (GLint) machine->NumDeriv) {
+ const GLint col = machine->CurElement;
+ const GLfloat w = machine->Attribs[FRAG_ATTRIB_WPOS][col][3];
+ const GLfloat invQ = 1.0f / w;
+ GLfloat deriv[4];
+
+ if (xOrY == 'X') {
+ deriv[0] = machine->DerivX[source->Index][0] * invQ;
+ deriv[1] = machine->DerivX[source->Index][1] * invQ;
+ deriv[2] = machine->DerivX[source->Index][2] * invQ;
+ deriv[3] = machine->DerivX[source->Index][3] * invQ;
+ }
+ else {
+ deriv[0] = machine->DerivY[source->Index][0] * invQ;
+ deriv[1] = machine->DerivY[source->Index][1] * invQ;
+ deriv[2] = machine->DerivY[source->Index][2] * invQ;
+ deriv[3] = machine->DerivY[source->Index][3] * invQ;
+ }
+
+ result[0] = deriv[GET_SWZ(source->Swizzle, 0)];
+ result[1] = deriv[GET_SWZ(source->Swizzle, 1)];
+ result[2] = deriv[GET_SWZ(source->Swizzle, 2)];
+ result[3] = deriv[GET_SWZ(source->Swizzle, 3)];
+
+ if (source->Abs) {
+ result[0] = FABSF(result[0]);
+ result[1] = FABSF(result[1]);
+ result[2] = FABSF(result[2]);
+ result[3] = FABSF(result[3]);
+ }
+ if (source->Negate) {
+ ASSERT(source->Negate == NEGATE_XYZW);
+ result[0] = -result[0];
+ result[1] = -result[1];
+ result[2] = -result[2];
+ result[3] = -result[3];
+ }
+ }
+ else {
+ ASSIGN_4V(result, 0.0, 0.0, 0.0, 0.0);
+ }
+ }
+
+
+ /**
+ * As above, but only return result[0] element.
+ */
+ static void
+ fetch_vector1(const struct prog_src_register *source,
+ const struct gl_program_machine *machine, GLfloat result[4])
+ {
+ const GLfloat *src = get_src_register_pointer(source, machine);
+ ASSERT(src);
+
+ result[0] = src[GET_SWZ(source->Swizzle, 0)];
+
+ if (source->Abs) {
+ result[0] = FABSF(result[0]);
+ }
+ if (source->Negate) {
+ result[0] = -result[0];
+ }
+ }
+
+
+ static GLuint
+ fetch_vector1ui(const struct prog_src_register *source,
+ const struct gl_program_machine *machine)
+ {
+ const GLuint *src = (GLuint *) get_src_register_pointer(source, machine);
+ return src[GET_SWZ(source->Swizzle, 0)];
+ }
+
+
+ /**
+ * Fetch texel from texture. Use partial derivatives when possible.
+ */
+ static INLINE void
+ fetch_texel(GLcontext *ctx,
+ const struct gl_program_machine *machine,
+ const struct prog_instruction *inst,
+ const GLfloat texcoord[4], GLfloat lodBias,
+ GLfloat color[4])
+ {
+ const GLuint unit = machine->Samplers[inst->TexSrcUnit];
+
+ /* Note: we only have the right derivatives for fragment input attribs.
+ */
+ if (machine->NumDeriv > 0 &&
+ inst->SrcReg[0].File == PROGRAM_INPUT &&
+ inst->SrcReg[0].Index == FRAG_ATTRIB_TEX0 + inst->TexSrcUnit) {
+ /* simple texture fetch for which we should have derivatives */
+ GLuint attr = inst->SrcReg[0].Index;
+ machine->FetchTexelDeriv(ctx, texcoord,
+ machine->DerivX[attr],
+ machine->DerivY[attr],
+ lodBias, unit, color);
+ }
+ else {
+ machine->FetchTexelLod(ctx, texcoord, lodBias, unit, color);
+ }
+ }
+
+
+ /**
+ * Test value against zero and return GT, LT, EQ or UN if NaN.
+ */
+ static INLINE GLuint
+ generate_cc(float value)
+ {
+ if (value != value)
+ return COND_UN; /* NaN */
+ if (value > 0.0F)
+ return COND_GT;
+ if (value < 0.0F)
+ return COND_LT;
+ return COND_EQ;
+ }
+
+
+ /**
+ * Test if the ccMaskRule is satisfied by the given condition code.
+ * Used to mask destination writes according to the current condition code.
+ */
+ static INLINE GLboolean
+ test_cc(GLuint condCode, GLuint ccMaskRule)
+ {
+ switch (ccMaskRule) {
+ case COND_EQ: return (condCode == COND_EQ);
+ case COND_NE: return (condCode != COND_EQ);
+ case COND_LT: return (condCode == COND_LT);
+ case COND_GE: return (condCode == COND_GT || condCode == COND_EQ);
+ case COND_LE: return (condCode == COND_LT || condCode == COND_EQ);
+ case COND_GT: return (condCode == COND_GT);
+ case COND_TR: return GL_TRUE;
+ case COND_FL: return GL_FALSE;
+ default: return GL_TRUE;
+ }
+ }
+
+
+ /**
+ * Evaluate the 4 condition codes against a predicate and return GL_TRUE
+ * or GL_FALSE to indicate result.
+ */
+ static INLINE GLboolean
+ eval_condition(const struct gl_program_machine *machine,
+ const struct prog_instruction *inst)
+ {
+ const GLuint swizzle = inst->DstReg.CondSwizzle;
+ const GLuint condMask = inst->DstReg.CondMask;
+ if (test_cc(machine->CondCodes[GET_SWZ(swizzle, 0)], condMask) ||
+ test_cc(machine->CondCodes[GET_SWZ(swizzle, 1)], condMask) ||
+ test_cc(machine->CondCodes[GET_SWZ(swizzle, 2)], condMask) ||
+ test_cc(machine->CondCodes[GET_SWZ(swizzle, 3)], condMask)) {
+ return GL_TRUE;
+ }
+ else {
+ return GL_FALSE;
+ }
+ }
+
+
+
+ /**
+ * Store 4 floats into a register. Observe the instructions saturate and
+ * set-condition-code flags.
+ */
+ static void
+ store_vector4(const struct prog_instruction *inst,
+ struct gl_program_machine *machine, const GLfloat value[4])
+ {
+ const struct prog_dst_register *dstReg = &(inst->DstReg);
+ const GLboolean clamp = inst->SaturateMode == SATURATE_ZERO_ONE;
+ GLuint writeMask = dstReg->WriteMask;
+ GLfloat clampedValue[4];
+ GLfloat *dst = get_dst_register_pointer(dstReg, machine);
+
+ #if 0
+ if (value[0] > 1.0e10 ||
+ IS_INF_OR_NAN(value[0]) ||
+ IS_INF_OR_NAN(value[1]) ||
+ IS_INF_OR_NAN(value[2]) || IS_INF_OR_NAN(value[3]))
+ printf("store %g %g %g %g\n", value[0], value[1], value[2], value[3]);
+ #endif
+
+ if (clamp) {
+ clampedValue[0] = CLAMP(value[0], 0.0F, 1.0F);
+ clampedValue[1] = CLAMP(value[1], 0.0F, 1.0F);
+ clampedValue[2] = CLAMP(value[2], 0.0F, 1.0F);
+ clampedValue[3] = CLAMP(value[3], 0.0F, 1.0F);
+ value = clampedValue;
+ }
+
+ if (dstReg->CondMask != COND_TR) {
+ /* condition codes may turn off some writes */
+ if (writeMask & WRITEMASK_X) {
+ if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 0)],
+ dstReg->CondMask))
+ writeMask &= ~WRITEMASK_X;
+ }
+ if (writeMask & WRITEMASK_Y) {
+ if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 1)],
+ dstReg->CondMask))
+ writeMask &= ~WRITEMASK_Y;
+ }
+ if (writeMask & WRITEMASK_Z) {
+ if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 2)],
+ dstReg->CondMask))
+ writeMask &= ~WRITEMASK_Z;
+ }
+ if (writeMask & WRITEMASK_W) {
+ if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 3)],
+ dstReg->CondMask))
+ writeMask &= ~WRITEMASK_W;
+ }
+ }
+
+ #ifdef NAN_CHECK
+ assert(!IS_INF_OR_NAN(value[0]));
+ assert(!IS_INF_OR_NAN(value[0]));
+ assert(!IS_INF_OR_NAN(value[0]));
+ assert(!IS_INF_OR_NAN(value[0]));
+ #endif
+
+ if (writeMask & WRITEMASK_X)
+ dst[0] = value[0];
+ if (writeMask & WRITEMASK_Y)
+ dst[1] = value[1];
+ if (writeMask & WRITEMASK_Z)
+ dst[2] = value[2];
+ if (writeMask & WRITEMASK_W)
+ dst[3] = value[3];
+
+ if (inst->CondUpdate) {
+ if (writeMask & WRITEMASK_X)
+ machine->CondCodes[0] = generate_cc(value[0]);
+ if (writeMask & WRITEMASK_Y)
+ machine->CondCodes[1] = generate_cc(value[1]);
+ if (writeMask & WRITEMASK_Z)
+ machine->CondCodes[2] = generate_cc(value[2]);
+ if (writeMask & WRITEMASK_W)
+ machine->CondCodes[3] = generate_cc(value[3]);
+ #if DEBUG_PROG
+ printf("CondCodes=(%s,%s,%s,%s) for:\n",
+ _mesa_condcode_string(machine->CondCodes[0]),
+ _mesa_condcode_string(machine->CondCodes[1]),
+ _mesa_condcode_string(machine->CondCodes[2]),
+ _mesa_condcode_string(machine->CondCodes[3]));
+ #endif
+ }
+ }
+
+
+ /**
+ * Store 4 uints into a register. Observe the set-condition-code flags.
+ */
+ static void
+ store_vector4ui(const struct prog_instruction *inst,
+ struct gl_program_machine *machine, const GLuint value[4])
+ {
+ const struct prog_dst_register *dstReg = &(inst->DstReg);
+ GLuint writeMask = dstReg->WriteMask;
+ GLuint *dst = (GLuint *) get_dst_register_pointer(dstReg, machine);
+
+ if (dstReg->CondMask != COND_TR) {
+ /* condition codes may turn off some writes */
+ if (writeMask & WRITEMASK_X) {
+ if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 0)],
+ dstReg->CondMask))
+ writeMask &= ~WRITEMASK_X;
+ }
+ if (writeMask & WRITEMASK_Y) {
+ if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 1)],
+ dstReg->CondMask))
+ writeMask &= ~WRITEMASK_Y;
+ }
+ if (writeMask & WRITEMASK_Z) {
+ if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 2)],
+ dstReg->CondMask))
+ writeMask &= ~WRITEMASK_Z;
+ }
+ if (writeMask & WRITEMASK_W) {
+ if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 3)],
+ dstReg->CondMask))
+ writeMask &= ~WRITEMASK_W;
+ }
+ }
+
+ if (writeMask & WRITEMASK_X)
+ dst[0] = value[0];
+ if (writeMask & WRITEMASK_Y)
+ dst[1] = value[1];
+ if (writeMask & WRITEMASK_Z)
+ dst[2] = value[2];
+ if (writeMask & WRITEMASK_W)
+ dst[3] = value[3];
+
+ if (inst->CondUpdate) {
+ if (writeMask & WRITEMASK_X)
+ machine->CondCodes[0] = generate_cc((float)value[0]);
+ if (writeMask & WRITEMASK_Y)
+ machine->CondCodes[1] = generate_cc((float)value[1]);
+ if (writeMask & WRITEMASK_Z)
+ machine->CondCodes[2] = generate_cc((float)value[2]);
+ if (writeMask & WRITEMASK_W)
+ machine->CondCodes[3] = generate_cc((float)value[3]);
+ #if DEBUG_PROG
+ printf("CondCodes=(%s,%s,%s,%s) for:\n",
+ _mesa_condcode_string(machine->CondCodes[0]),
+ _mesa_condcode_string(machine->CondCodes[1]),
+ _mesa_condcode_string(machine->CondCodes[2]),
+ _mesa_condcode_string(machine->CondCodes[3]));
+ #endif
+ }
+ }
+
+
+
+ /**
+ * Execute the given vertex/fragment program.
+ *
+ * \param ctx rendering context
+ * \param program the program to execute
+ * \param machine machine state (must be initialized)
+ * \return GL_TRUE if program completed or GL_FALSE if program executed KIL.
+ */
+ GLboolean
+ _mesa_execute_program(GLcontext * ctx,
+ const struct gl_program *program,
+ struct gl_program_machine *machine)
+ {
+ const GLuint numInst = program->NumInstructions;
+ const GLuint maxExec = 10000;
+ GLuint pc, numExec = 0;
+
+ machine->CurProgram = program;
+
+ if (DEBUG_PROG) {
+ printf("execute program %u --------------------\n", program->Id);
+ }
+
+ if (program->Target == GL_VERTEX_PROGRAM_ARB) {
+ machine->EnvParams = ctx->VertexProgram.Parameters;
+ }
+ else {
+ machine->EnvParams = ctx->FragmentProgram.Parameters;
+ }
+
+ for (pc = 0; pc < numInst; pc++) {
+ const struct prog_instruction *inst = program->Instructions + pc;
+
+ if (DEBUG_PROG) {
+ _mesa_print_instruction(inst);
+ }
+
+ switch (inst->Opcode) {
+ case OPCODE_ABS:
+ {
+ GLfloat a[4], result[4];
+ fetch_vector4(&inst->SrcReg[0], machine, a);
+ result[0] = FABSF(a[0]);
+ result[1] = FABSF(a[1]);
+ result[2] = FABSF(a[2]);
+ result[3] = FABSF(a[3]);
+ store_vector4(inst, machine, result);
+ }
+ break;
+ case OPCODE_ADD:
+ {
+ GLfloat a[4], b[4], result[4];
+ fetch_vector4(&inst->SrcReg[0], machine, a);
+ fetch_vector4(&inst->SrcReg[1], machine, b);
+ result[0] = a[0] + b[0];
+ result[1] = a[1] + b[1];
+ result[2] = a[2] + b[2];
+ result[3] = a[3] + b[3];
+ store_vector4(inst, machine, result);
+ if (DEBUG_PROG) {
+ printf("ADD (%g %g %g %g) = (%g %g %g %g) + (%g %g %g %g)\n",
+ result[0], result[1], result[2], result[3],
+ a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
+ }
+ }
+ break;
+ case OPCODE_AND: /* bitwise AND */
+ {
+ GLuint a[4], b[4], result[4];
+ fetch_vector4ui(&inst->SrcReg[0], machine, a);
+ fetch_vector4ui(&inst->SrcReg[1], machine, b);
+ result[0] = a[0] & b[0];
+ result[1] = a[1] & b[1];
+ result[2] = a[2] & b[2];
+ result[3] = a[3] & b[3];
+ store_vector4ui(inst, machine, result);
+ }
+ break;
+ case OPCODE_ARL:
+ {
+ GLfloat t[4];
+ fetch_vector4(&inst->SrcReg[0], machine, t);
+ machine->AddressReg[0][0] = IFLOOR(t[0]);
+ if (DEBUG_PROG) {
+ printf("ARL %d\n", machine->AddressReg[0][0]);
+ }
+ }
+ break;
+ case OPCODE_BGNLOOP:
+ /* no-op */
+ ASSERT(program->Instructions[inst->BranchTarget].Opcode
+ == OPCODE_ENDLOOP);
+ break;
+ case OPCODE_ENDLOOP:
+ /* subtract 1 here since pc is incremented by for(pc) loop */
+ ASSERT(program->Instructions[inst->BranchTarget].Opcode
+ == OPCODE_BGNLOOP);
+ pc = inst->BranchTarget - 1; /* go to matching BNGLOOP */
+ break;
+ case OPCODE_BGNSUB: /* begin subroutine */
+ break;
+ case OPCODE_ENDSUB: /* end subroutine */
+ break;
+ case OPCODE_BRA: /* branch (conditional) */
+ if (eval_condition(machine, inst)) {
+ /* take branch */
+ /* Subtract 1 here since we'll do pc++ below */
+ pc = inst->BranchTarget - 1;
+ }
+ break;
+ case OPCODE_BRK: /* break out of loop (conditional) */
+ ASSERT(program->Instructions[inst->BranchTarget].Opcode
+ == OPCODE_ENDLOOP);
+ if (eval_condition(machine, inst)) {
+ /* break out of loop */
+ /* pc++ at end of for-loop will put us after the ENDLOOP inst */
+ pc = inst->BranchTarget;
+ }
+ break;
+ case OPCODE_CONT: /* continue loop (conditional) */
+ ASSERT(program->Instructions[inst->BranchTarget].Opcode
+ == OPCODE_ENDLOOP);
+ if (eval_condition(machine, inst)) {
+ /* continue at ENDLOOP */
+ /* Subtract 1 here since we'll do pc++ at end of for-loop */
+ pc = inst->BranchTarget - 1;
+ }
+ break;
+ case OPCODE_CAL: /* Call subroutine (conditional) */
+ if (eval_condition(machine, inst)) {
+ /* call the subroutine */
+ if (machine->StackDepth >= MAX_PROGRAM_CALL_DEPTH) {
+ return GL_TRUE; /* Per GL_NV_vertex_program2 spec */
+ }
+ machine->CallStack[machine->StackDepth++] = pc + 1; /* next inst */
+ /* Subtract 1 here since we'll do pc++ at end of for-loop */
+ pc = inst->BranchTarget - 1;
+ }
+ break;
+ case OPCODE_CMP:
+ {
+ GLfloat a[4], b[4], c[4], result[4];
+ fetch_vector4(&inst->SrcReg[0], machine, a);
+ fetch_vector4(&inst->SrcReg[1], machine, b);
+ fetch_vector4(&inst->SrcReg[2], machine, c);
+ result[0] = a[0] < 0.0F ? b[0] : c[0];
+ result[1] = a[1] < 0.0F ? b[1] : c[1];
+ result[2] = a[2] < 0.0F ? b[2] : c[2];
+ result[3] = a[3] < 0.0F ? b[3] : c[3];
+ store_vector4(inst, machine, result);
+ }
+ break;
+ case OPCODE_COS:
+ {
+ GLfloat a[4], result[4];
+ fetch_vector1(&inst->SrcReg[0], machine, a);
+ result[0] = result[1] = result[2] = result[3]
+ = (GLfloat) cos(a[0]);
+ store_vector4(inst, machine, result);
+ }
+ break;
+ case OPCODE_DDX: /* Partial derivative with respect to X */
+ {
+ GLfloat result[4];
+ fetch_vector4_deriv(ctx, &inst->SrcReg[0], machine,
+ 'X', result);
+ store_vector4(inst, machine, result);
+ }
+ break;
+ case OPCODE_DDY: /* Partial derivative with respect to Y */
+ {
+ GLfloat result[4];
+ fetch_vector4_deriv(ctx, &inst->SrcReg[0], machine,
+ 'Y', result);
+ store_vector4(inst, machine, result);
+ }
+ break;
+ case OPCODE_DP2:
+ {
+ GLfloat a[4], b[4], result[4];
+ fetch_vector4(&inst->SrcReg[0], machine, a);
+ fetch_vector4(&inst->SrcReg[1], machine, b);
+ result[0] = result[1] = result[2] = result[3] = DOT2(a, b);
+ store_vector4(inst, machine, result);
+ if (DEBUG_PROG) {
+ printf("DP2 %g = (%g %g) . (%g %g)\n",
+ result[0], a[0], a[1], b[0], b[1]);
+ }
+ }
+ break;
+ case OPCODE_DP2A:
+ {
+ GLfloat a[4], b[4], c, result[4];
+ fetch_vector4(&inst->SrcReg[0], machine, a);
+ fetch_vector4(&inst->SrcReg[1], machine, b);
+ fetch_vector1(&inst->SrcReg[1], machine, &c);
+ result[0] = result[1] = result[2] = result[3] = DOT2(a, b) + c;
+ store_vector4(inst, machine, result);
+ if (DEBUG_PROG) {
+ printf("DP2A %g = (%g %g) . (%g %g) + %g\n",
+ result[0], a[0], a[1], b[0], b[1], c);
+ }
+ }
+ break;
+ case OPCODE_DP3:
+ {
+ GLfloat a[4], b[4], result[4];
+ fetch_vector4(&inst->SrcReg[0], machine, a);
+ fetch_vector4(&inst->SrcReg[1], machine, b);
+ result[0] = result[1] = result[2] = result[3] = DOT3(a, b);
+ store_vector4(inst, machine, result);
+ if (DEBUG_PROG) {
+ printf("DP3 %g = (%g %g %g) . (%g %g %g)\n",
+ result[0], a[0], a[1], a[2], b[0], b[1], b[2]);
+ }
+ }
+ break;
+ case OPCODE_DP4:
+ {
+ GLfloat a[4], b[4], result[4];
+ fetch_vector4(&inst->SrcReg[0], machine, a);
+ fetch_vector4(&inst->SrcReg[1], machine, b);
+ result[0] = result[1] = result[2] = result[3] = DOT4(a, b);
+ store_vector4(inst, machine, result);
+ if (DEBUG_PROG) {
+ printf("DP4 %g = (%g, %g %g %g) . (%g, %g %g %g)\n",
+ result[0], a[0], a[1], a[2], a[3],
+ b[0], b[1], b[2], b[3]);
+ }
+ }
+ break;
+ case OPCODE_DPH:
+ {
+ GLfloat a[4], b[4], result[4];
+ fetch_vector4(&inst->SrcReg[0], machine, a);
+ fetch_vector4(&inst->SrcReg[1], machine, b);
+ result[0] = result[1] = result[2] = result[3] = DOT3(a, b) + b[3];
+ store_vector4(inst, machine, result);
+ }
+ break;
+ case OPCODE_DST: /* Distance vector */
+ {
+ GLfloat a[4], b[4], result[4];
+ fetch_vector4(&inst->SrcReg[0], machine, a);
+ fetch_vector4(&inst->SrcReg[1], machine, b);
+ result[0] = 1.0F;
+ result[1] = a[1] * b[1];
+ result[2] = a[2];
+ result[3] = b[3];
+ store_vector4(inst, machine, result);
+ }
+ break;
+ case OPCODE_EXP:
+ {
+ GLfloat t[4], q[4], floor_t0;
+ fetch_vector1(&inst->SrcReg[0], machine, t);
+ floor_t0 = FLOORF(t[0]);
+ if (floor_t0 > FLT_MAX_EXP) {
+ SET_POS_INFINITY(q[0]);
+ SET_POS_INFINITY(q[2]);
+ }
+ else if (floor_t0 < FLT_MIN_EXP) {
+ q[0] = 0.0F;
+ q[2] = 0.0F;
+ }
+ else {
+ q[0] = LDEXPF(1.0, (int) floor_t0);
+ /* Note: GL_NV_vertex_program expects
+ * result.z = result.x * APPX(result.y)
+ * We do what the ARB extension says.
+ */
+ q[2] = (GLfloat) pow(2.0, t[0]);
+ }
+ q[1] = t[0] - floor_t0;
+ q[3] = 1.0F;
+ store_vector4( inst, machine, q );
+ }
+ break;
+ case OPCODE_EX2: /* Exponential base 2 */
+ {
+ GLfloat a[4], result[4], val;
+ fetch_vector1(&inst->SrcReg[0], machine, a);
+ val = (GLfloat) pow(2.0, a[0]);
+ /*
+ if (IS_INF_OR_NAN(val))
+ val = 1.0e10;
+ */
+ result[0] = result[1] = result[2] = result[3] = val;
+ store_vector4(inst, machine, result);
+ }
+ break;
+ case OPCODE_FLR:
+ {
+ GLfloat a[4], result[4];
+ fetch_vector4(&inst->SrcReg[0], machine, a);
+ result[0] = FLOORF(a[0]);
+ result[1] = FLOORF(a[1]);
+ result[2] = FLOORF(a[2]);
+ result[3] = FLOORF(a[3]);
+ store_vector4(inst, machine, result);
+ }
+ break;
+ case OPCODE_FRC:
+ {
+ GLfloat a[4], result[4];
+ fetch_vector4(&inst->SrcReg[0], machine, a);
+ result[0] = a[0] - FLOORF(a[0]);
+ result[1] = a[1] - FLOORF(a[1]);
+ result[2] = a[2] - FLOORF(a[2]);
+ result[3] = a[3] - FLOORF(a[3]);
+ store_vector4(inst, machine, result);
+ }
+ break;
+ case OPCODE_IF:
+ {
+ GLboolean cond;
+ ASSERT(program->Instructions[inst->BranchTarget].Opcode
+ == OPCODE_ELSE ||
+ program->Instructions[inst->BranchTarget].Opcode
+ == OPCODE_ENDIF);
+ /* eval condition */
+ if (inst->SrcReg[0].File != PROGRAM_UNDEFINED) {
+ GLfloat a[4];
+ fetch_vector1(&inst->SrcReg[0], machine, a);
+ cond = (a[0] != 0.0);
+ }
+ else {
+ cond = eval_condition(machine, inst);
+ }
+ if (DEBUG_PROG) {
+ printf("IF: %d\n", cond);
+ }
+ /* do if/else */
+ if (cond) {
+ /* do if-clause (just continue execution) */
+ }
+ else {
+ /* go to the instruction after ELSE or ENDIF */
+ assert(inst->BranchTarget >= 0);
+ pc = inst->BranchTarget;
+ }
+ }
+ break;
+ case OPCODE_ELSE:
+ /* goto ENDIF */
+ ASSERT(program->Instructions[inst->BranchTarget].Opcode
+ == OPCODE_ENDIF);
+ assert(inst->BranchTarget >= 0);
+ pc = inst->BranchTarget;
+ break;
+ case OPCODE_ENDIF:
+ /* nothing */
+ break;
+ case OPCODE_KIL_NV: /* NV_f_p only (conditional) */
+ if (eval_condition(machine, inst)) {
+ return GL_FALSE;
+ }
+ break;
+ case OPCODE_KIL: /* ARB_f_p only */
+ {
+ GLfloat a[4];
+ fetch_vector4(&inst->SrcReg[0], machine, a);
+ if (DEBUG_PROG) {
+ printf("KIL if (%g %g %g %g) <= 0.0\n",
+ a[0], a[1], a[2], a[3]);
+ }
+
+ if (a[0] < 0.0F || a[1] < 0.0F || a[2] < 0.0F || a[3] < 0.0F) {
+ return GL_FALSE;
+ }
+ }
+ break;
+ case OPCODE_LG2: /* log base 2 */
+ {
+ GLfloat a[4], result[4], val;
+ fetch_vector1(&inst->SrcReg[0], machine, a);
+ /* The fast LOG2 macro doesn't meet the precision requirements.
+ */
+ if (a[0] == 0.0F) {
+ val = -FLT_MAX;
+ }
+ else {
+ val = (float)(log(a[0]) * 1.442695F);
+ }
+ result[0] = result[1] = result[2] = result[3] = val;
+ store_vector4(inst, machine, result);
+ }
+ break;
+ case OPCODE_LIT:
+ {
+ const GLfloat epsilon = 1.0F / 256.0F; /* from NV VP spec */
+ GLfloat a[4], result[4];
+ fetch_vector4(&inst->SrcReg[0], machine, a);
+ a[0] = MAX2(a[0], 0.0F);
+ a[1] = MAX2(a[1], 0.0F);
+ /* XXX ARB version clamps a[3], NV version doesn't */
+ a[3] = CLAMP(a[3], -(128.0F - epsilon), (128.0F - epsilon));
+ result[0] = 1.0F;
+ result[1] = a[0];
+ /* XXX we could probably just use pow() here */
+ if (a[0] > 0.0F) {
+ if (a[1] == 0.0 && a[3] == 0.0)
+ result[2] = 1.0F;
+ else
+ result[2] = (GLfloat) pow(a[1], a[3]);
+ }
+ else {
+ result[2] = 0.0F;
+ }
+ result[3] = 1.0F;
+ store_vector4(inst, machine, result);
+ if (DEBUG_PROG) {
+ printf("LIT (%g %g %g %g) : (%g %g %g %g)\n",
+ result[0], result[1], result[2], result[3],
+ a[0], a[1], a[2], a[3]);
+ }
+ }
+ break;
+ case OPCODE_LOG:
+ {
+ GLfloat t[4], q[4], abs_t0;
+ fetch_vector1(&inst->SrcReg[0], machine, t);
+ abs_t0 = FABSF(t[0]);
+ if (abs_t0 != 0.0F) {
+ /* Since we really can't handle infinite values on VMS
+ * like other OSes we'll use __MAXFLOAT to represent
+ * infinity. This may need some tweaking.
+ */
+ #ifdef VMS
+ if (abs_t0 == __MAXFLOAT)
+ #else
+ if (IS_INF_OR_NAN(abs_t0))
+ #endif
+ {
+ SET_POS_INFINITY(q[0]);
+ q[1] = 1.0F;
+ SET_POS_INFINITY(q[2]);
+ }
+ else {
+ int exponent;
+ GLfloat mantissa = FREXPF(t[0], &exponent);
+ q[0] = (GLfloat) (exponent - 1);
+ q[1] = (GLfloat) (2.0 * mantissa); /* map [.5, 1) -> [1, 2) */
+
+ /* The fast LOG2 macro doesn't meet the precision
+ * requirements.
+ */
+ q[2] = (float)(log(t[0]) * 1.442695F);
+ }
+ }
+ else {
+ SET_NEG_INFINITY(q[0]);
+ q[1] = 1.0F;
+ SET_NEG_INFINITY(q[2]);
+ }
+ q[3] = 1.0;
+ store_vector4(inst, machine, q);
+ }
+ break;
+ case OPCODE_LRP:
+ {
+ GLfloat a[4], b[4], c[4], result[4];
+ fetch_vector4(&inst->SrcReg[0], machine, a);
+ fetch_vector4(&inst->SrcReg[1], machine, b);
+ fetch_vector4(&inst->SrcReg[2], machine, c);
+ result[0] = a[0] * b[0] + (1.0F - a[0]) * c[0];
+ result[1] = a[1] * b[1] + (1.0F - a[1]) * c[1];
+ result[2] = a[2] * b[2] + (1.0F - a[2]) * c[2];
+ result[3] = a[3] * b[3] + (1.0F - a[3]) * c[3];
+ store_vector4(inst, machine, result);
+ if (DEBUG_PROG) {
+ printf("LRP (%g %g %g %g) = (%g %g %g %g), "
+ "(%g %g %g %g), (%g %g %g %g)\n",
+ result[0], result[1], result[2], result[3],
+ a[0], a[1], a[2], a[3],
+ b[0], b[1], b[2], b[3], c[0], c[1], c[2], c[3]);
+ }
+ }
+ break;
+ case OPCODE_MAD:
+ {
+ GLfloat a[4], b[4], c[4], result[4];
+ fetch_vector4(&inst->SrcReg[0], machine, a);
+ fetch_vector4(&inst->SrcReg[1], machine, b);
+ fetch_vector4(&inst->SrcReg[2], machine, c);
+ result[0] = a[0] * b[0] + c[0];
+ result[1] = a[1] * b[1] + c[1];
+ result[2] = a[2] * b[2] + c[2];
+ result[3] = a[3] * b[3] + c[3];
+ store_vector4(inst, machine, result);
+ if (DEBUG_PROG) {
+ printf("MAD (%g %g %g %g) = (%g %g %g %g) * "
+ "(%g %g %g %g) + (%g %g %g %g)\n",
+ result[0], result[1], result[2], result[3],
+ a[0], a[1], a[2], a[3],
+ b[0], b[1], b[2], b[3], c[0], c[1], c[2], c[3]);
+ }
+ }
+ break;
+ case OPCODE_MAX:
+ {
+ GLfloat a[4], b[4], result[4];
+ fetch_vector4(&inst->SrcReg[0], machine, a);
+ fetch_vector4(&inst->SrcReg[1], machine, b);
+ result[0] = MAX2(a[0], b[0]);
+ result[1] = MAX2(a[1], b[1]);
+ result[2] = MAX2(a[2], b[2]);
+ result[3] = MAX2(a[3], b[3]);
+ store_vector4(inst, machine, result);
+ if (DEBUG_PROG) {
+ printf("MAX (%g %g %g %g) = (%g %g %g %g), (%g %g %g %g)\n",
+ result[0], result[1], result[2], result[3],
+ a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
+ }
+ }
+ break;
+ case OPCODE_MIN:
+ {
+ GLfloat a[4], b[4], result[4];
+ fetch_vector4(&inst->SrcReg[0], machine, a);
+ fetch_vector4(&inst->SrcReg[1], machine, b);
+ result[0] = MIN2(a[0], b[0]);
+ result[1] = MIN2(a[1], b[1]);
+ result[2] = MIN2(a[2], b[2]);
+ result[3] = MIN2(a[3], b[3]);
+ store_vector4(inst, machine, result);
+ }
+ break;
+ case OPCODE_MOV:
+ {
+ GLfloat result[4];
+ fetch_vector4(&inst->SrcReg[0], machine, result);
+ store_vector4(inst, machine, result);
+ if (DEBUG_PROG) {
+ printf("MOV (%g %g %g %g)\n",
+ result[0], result[1], result[2], result[3]);
+ }
+ }
+ break;
+ case OPCODE_MUL:
+ {
+ GLfloat a[4], b[4], result[4];
+ fetch_vector4(&inst->SrcReg[0], machine, a);
+ fetch_vector4(&inst->SrcReg[1], machine, b);
+ result[0] = a[0] * b[0];
+ result[1] = a[1] * b[1];
+ result[2] = a[2] * b[2];
+ result[3] = a[3] * b[3];
+ store_vector4(inst, machine, result);
+ if (DEBUG_PROG) {
+ printf("MUL (%g %g %g %g) = (%g %g %g %g) * (%g %g %g %g)\n",
+ result[0], result[1], result[2], result[3],
+ a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
+ }
+ }
+ break;
+ case OPCODE_NOISE1:
+ {
+ GLfloat a[4], result[4];
+ fetch_vector1(&inst->SrcReg[0], machine, a);
+ result[0] =
+ result[1] =
+ result[2] =
+ result[3] = _mesa_noise1(a[0]);
+ store_vector4(inst, machine, result);
+ }
+ break;
+ case OPCODE_NOISE2:
+ {
+ GLfloat a[4], result[4];
+ fetch_vector4(&inst->SrcReg[0], machine, a);
+ result[0] =
+ result[1] =
+ result[2] = result[3] = _mesa_noise2(a[0], a[1]);
+ store_vector4(inst, machine, result);
+ }
+ break;
+ case OPCODE_NOISE3:
+ {
+ GLfloat a[4], result[4];
+ fetch_vector4(&inst->SrcReg[0], machine, a);
+ result[0] =
+ result[1] =
+ result[2] =
+ result[3] = _mesa_noise3(a[0], a[1], a[2]);
+ store_vector4(inst, machine, result);
+ }
+ break;
+ case OPCODE_NOISE4:
+ {
+ GLfloat a[4], result[4];
+ fetch_vector4(&inst->SrcReg[0], machine, a);
+ result[0] =
+ result[1] =
+ result[2] =
+ result[3] = _mesa_noise4(a[0], a[1], a[2], a[3]);
+ store_vector4(inst, machine, result);
+ }
+ break;
+ case OPCODE_NOP:
+ break;
+ case OPCODE_NOT: /* bitwise NOT */
+ {
+ GLuint a[4], result[4];
+ fetch_vector4ui(&inst->SrcReg[0], machine, a);
+ result[0] = ~a[0];
+ result[1] = ~a[1];
+ result[2] = ~a[2];
+ result[3] = ~a[3];
+ store_vector4ui(inst, machine, result);
+ }
+ break;
+ case OPCODE_NRM3: /* 3-component normalization */
+ {
+ GLfloat a[4], result[4];
+ GLfloat tmp;
+ fetch_vector4(&inst->SrcReg[0], machine, a);
+ tmp = a[0] * a[0] + a[1] * a[1] + a[2] * a[2];
+ if (tmp != 0.0F)
+ tmp = INV_SQRTF(tmp);
+ result[0] = tmp * a[0];
+ result[1] = tmp * a[1];
+ result[2] = tmp * a[2];
+ result[3] = 0.0; /* undefined, but prevent valgrind warnings */
+ store_vector4(inst, machine, result);
+ }
+ break;
+ case OPCODE_NRM4: /* 4-component normalization */
+ {
+ GLfloat a[4], result[4];
+ GLfloat tmp;
+ fetch_vector4(&inst->SrcReg[0], machine, a);
+ tmp = a[0] * a[0] + a[1] * a[1] + a[2] * a[2] + a[3] * a[3];
+ if (tmp != 0.0F)
+ tmp = INV_SQRTF(tmp);
+ result[0] = tmp * a[0];
+ result[1] = tmp * a[1];
+ result[2] = tmp * a[2];
+ result[3] = tmp * a[3];
+ store_vector4(inst, machine, result);
+ }
+ break;
+ case OPCODE_OR: /* bitwise OR */
+ {
+ GLuint a[4], b[4], result[4];
+ fetch_vector4ui(&inst->SrcReg[0], machine, a);
+ fetch_vector4ui(&inst->SrcReg[1], machine, b);
+ result[0] = a[0] | b[0];
+ result[1] = a[1] | b[1];
+ result[2] = a[2] | b[2];
+ result[3] = a[3] | b[3];
+ store_vector4ui(inst, machine, result);
+ }
+ break;
+ case OPCODE_PK2H: /* pack two 16-bit floats in one 32-bit float */
+ {
+ GLfloat a[4];
+ GLuint result[4];
+ GLhalfNV hx, hy;
+ fetch_vector4(&inst->SrcReg[0], machine, a);
+ hx = _mesa_float_to_half(a[0]);
+ hy = _mesa_float_to_half(a[1]);
+ result[0] =
+ result[1] =
+ result[2] =
+ result[3] = hx | (hy << 16);
+ store_vector4ui(inst, machine, result);
+ }
+ break;
+ case OPCODE_PK2US: /* pack two GLushorts into one 32-bit float */
+ {
+ GLfloat a[4];
+ GLuint result[4], usx, usy;
+ fetch_vector4(&inst->SrcReg[0], machine, a);
+ a[0] = CLAMP(a[0], 0.0F, 1.0F);
+ a[1] = CLAMP(a[1], 0.0F, 1.0F);
+ usx = IROUND(a[0] * 65535.0F);
+ usy = IROUND(a[1] * 65535.0F);
+ result[0] =
+ result[1] =
+ result[2] =
+ result[3] = usx | (usy << 16);
+ store_vector4ui(inst, machine, result);
+ }
+ break;
+ case OPCODE_PK4B: /* pack four GLbytes into one 32-bit float */
+ {
+ GLfloat a[4];
+ GLuint result[4], ubx, uby, ubz, ubw;
+ fetch_vector4(&inst->SrcReg[0], machine, a);
+ a[0] = CLAMP(a[0], -128.0F / 127.0F, 1.0F);
+ a[1] = CLAMP(a[1], -128.0F / 127.0F, 1.0F);
+ a[2] = CLAMP(a[2], -128.0F / 127.0F, 1.0F);
+ a[3] = CLAMP(a[3], -128.0F / 127.0F, 1.0F);
+ ubx = IROUND(127.0F * a[0] + 128.0F);
+ uby = IROUND(127.0F * a[1] + 128.0F);
+ ubz = IROUND(127.0F * a[2] + 128.0F);
+ ubw = IROUND(127.0F * a[3] + 128.0F);
+ result[0] =
+ result[1] =
+ result[2] =
+ result[3] = ubx | (uby << 8) | (ubz << 16) | (ubw << 24);
+ store_vector4ui(inst, machine, result);
+ }
+ break;
+ case OPCODE_PK4UB: /* pack four GLubytes into one 32-bit float */
+ {
+ GLfloat a[4];
+ GLuint result[4], ubx, uby, ubz, ubw;
+ fetch_vector4(&inst->SrcReg[0], machine, a);
+ a[0] = CLAMP(a[0], 0.0F, 1.0F);
+ a[1] = CLAMP(a[1], 0.0F, 1.0F);
+ a[2] = CLAMP(a[2], 0.0F, 1.0F);
+ a[3] = CLAMP(a[3], 0.0F, 1.0F);
+ ubx = IROUND(255.0F * a[0]);
+ uby = IROUND(255.0F * a[1]);
+ ubz = IROUND(255.0F * a[2]);
+ ubw = IROUND(255.0F * a[3]);
+ result[0] =
+ result[1] =
+ result[2] =
+ result[3] = ubx | (uby << 8) | (ubz << 16) | (ubw << 24);
+ store_vector4ui(inst, machine, result);
+ }
+ break;
+ case OPCODE_POW:
+ {
+ GLfloat a[4], b[4], result[4];
+ fetch_vector1(&inst->SrcReg[0], machine, a);
+ fetch_vector1(&inst->SrcReg[1], machine, b);
+ result[0] = result[1] = result[2] = result[3]
+ = (GLfloat) pow(a[0], b[0]);
+ store_vector4(inst, machine, result);
+ }
+ break;
+ case OPCODE_RCP:
+ {
+ GLfloat a[4], result[4];
+ fetch_vector1(&inst->SrcReg[0], machine, a);
+ if (DEBUG_PROG) {
+ if (a[0] == 0)
+ printf("RCP(0)\n");
+ else if (IS_INF_OR_NAN(a[0]))
+ printf("RCP(inf)\n");
+ }
+ result[0] = result[1] = result[2] = result[3] = 1.0F / a[0];
+ store_vector4(inst, machine, result);
+ }
+ break;
+ case OPCODE_RET: /* return from subroutine (conditional) */
+ if (eval_condition(machine, inst)) {
+ if (machine->StackDepth == 0) {
+ return GL_TRUE; /* Per GL_NV_vertex_program2 spec */
+ }
+ /* subtract one because of pc++ in the for loop */
+ pc = machine->CallStack[--machine->StackDepth] - 1;
+ }
+ break;
+ case OPCODE_RFL: /* reflection vector */
+ {
+ GLfloat axis[4], dir[4], result[4], tmpX, tmpW;
+ fetch_vector4(&inst->SrcReg[0], machine, axis);
+ fetch_vector4(&inst->SrcReg[1], machine, dir);
+ tmpW = DOT3(axis, axis);
+ tmpX = (2.0F * DOT3(axis, dir)) / tmpW;
+ result[0] = tmpX * axis[0] - dir[0];
+ result[1] = tmpX * axis[1] - dir[1];
+ result[2] = tmpX * axis[2] - dir[2];
+ /* result[3] is never written! XXX enforce in parser! */
+ store_vector4(inst, machine, result);
+ }
+ break;
+ case OPCODE_RSQ: /* 1 / sqrt() */
+ {
+ GLfloat a[4], result[4];
+ fetch_vector1(&inst->SrcReg[0], machine, a);
+ a[0] = FABSF(a[0]);
+ result[0] = result[1] = result[2] = result[3] = INV_SQRTF(a[0]);
+ store_vector4(inst, machine, result);
+ if (DEBUG_PROG) {
+ printf("RSQ %g = 1/sqrt(|%g|)\n", result[0], a[0]);
+ }
+ }
+ break;
+ case OPCODE_SCS: /* sine and cos */
+ {
+ GLfloat a[4], result[4];
+ fetch_vector1(&inst->SrcReg[0], machine, a);
+ result[0] = (GLfloat) cos(a[0]);
+ result[1] = (GLfloat) sin(a[0]);
+ result[2] = 0.0; /* undefined! */
+ result[3] = 0.0; /* undefined! */
+ store_vector4(inst, machine, result);
+ }
+ break;
+ case OPCODE_SEQ: /* set on equal */
+ {
+ GLfloat a[4], b[4], result[4];
+ fetch_vector4(&inst->SrcReg[0], machine, a);
+ fetch_vector4(&inst->SrcReg[1], machine, b);
+ result[0] = (a[0] == b[0]) ? 1.0F : 0.0F;
+ result[1] = (a[1] == b[1]) ? 1.0F : 0.0F;
+ result[2] = (a[2] == b[2]) ? 1.0F : 0.0F;
+ result[3] = (a[3] == b[3]) ? 1.0F : 0.0F;
+ store_vector4(inst, machine, result);
+ if (DEBUG_PROG) {
+ printf("SEQ (%g %g %g %g) = (%g %g %g %g) == (%g %g %g %g)\n",
+ result[0], result[1], result[2], result[3],
+ a[0], a[1], a[2], a[3],
+ b[0], b[1], b[2], b[3]);
+ }
+ }
+ break;
+ case OPCODE_SFL: /* set false, operands ignored */
+ {
+ static const GLfloat result[4] = { 0.0F, 0.0F, 0.0F, 0.0F };
+ store_vector4(inst, machine, result);
+ }
+ break;
+ case OPCODE_SGE: /* set on greater or equal */
+ {
+ GLfloat a[4], b[4], result[4];
+ fetch_vector4(&inst->SrcReg[0], machine, a);
+ fetch_vector4(&inst->SrcReg[1], machine, b);
+ result[0] = (a[0] >= b[0]) ? 1.0F : 0.0F;
+ result[1] = (a[1] >= b[1]) ? 1.0F : 0.0F;
+ result[2] = (a[2] >= b[2]) ? 1.0F : 0.0F;
+ result[3] = (a[3] >= b[3]) ? 1.0F : 0.0F;
+ store_vector4(inst, machine, result);
+ if (DEBUG_PROG) {
+ printf("SGE (%g %g %g %g) = (%g %g %g %g) >= (%g %g %g %g)\n",
+ result[0], result[1], result[2], result[3],
+ a[0], a[1], a[2], a[3],
+ b[0], b[1], b[2], b[3]);
+ }
+ }
+ break;
+ case OPCODE_SGT: /* set on greater */
+ {
+ GLfloat a[4], b[4], result[4];
+ fetch_vector4(&inst->SrcReg[0], machine, a);
+ fetch_vector4(&inst->SrcReg[1], machine, b);
+ result[0] = (a[0] > b[0]) ? 1.0F : 0.0F;
+ result[1] = (a[1] > b[1]) ? 1.0F : 0.0F;
+ result[2] = (a[2] > b[2]) ? 1.0F : 0.0F;
+ result[3] = (a[3] > b[3]) ? 1.0F : 0.0F;
+ store_vector4(inst, machine, result);
+ if (DEBUG_PROG) {
+ printf("SGT (%g %g %g %g) = (%g %g %g %g) > (%g %g %g %g)\n",
+ result[0], result[1], result[2], result[3],
+ a[0], a[1], a[2], a[3],
+ b[0], b[1], b[2], b[3]);
+ }
+ }
+ break;
+ case OPCODE_SIN:
+ {
+ GLfloat a[4], result[4];
+ fetch_vector1(&inst->SrcReg[0], machine, a);
+ result[0] = result[1] = result[2] = result[3]
+ = (GLfloat) sin(a[0]);
+ store_vector4(inst, machine, result);
+ }
+ break;
+ case OPCODE_SLE: /* set on less or equal */
+ {
+ GLfloat a[4], b[4], result[4];
+ fetch_vector4(&inst->SrcReg[0], machine, a);
+ fetch_vector4(&inst->SrcReg[1], machine, b);
+ result[0] = (a[0] <= b[0]) ? 1.0F : 0.0F;
+ result[1] = (a[1] <= b[1]) ? 1.0F : 0.0F;
+ result[2] = (a[2] <= b[2]) ? 1.0F : 0.0F;
+ result[3] = (a[3] <= b[3]) ? 1.0F : 0.0F;
+ store_vector4(inst, machine, result);
+ if (DEBUG_PROG) {
+ printf("SLE (%g %g %g %g) = (%g %g %g %g) <= (%g %g %g %g)\n",
+ result[0], result[1], result[2], result[3],
+ a[0], a[1], a[2], a[3],
+ b[0], b[1], b[2], b[3]);
+ }
+ }
+ break;
+ case OPCODE_SLT: /* set on less */
+ {
+ GLfloat a[4], b[4], result[4];
+ fetch_vector4(&inst->SrcReg[0], machine, a);
+ fetch_vector4(&inst->SrcReg[1], machine, b);
+ result[0] = (a[0] < b[0]) ? 1.0F : 0.0F;
+ result[1] = (a[1] < b[1]) ? 1.0F : 0.0F;
+ result[2] = (a[2] < b[2]) ? 1.0F : 0.0F;
+ result[3] = (a[3] < b[3]) ? 1.0F : 0.0F;
+ store_vector4(inst, machine, result);
+ if (DEBUG_PROG) {
+ printf("SLT (%g %g %g %g) = (%g %g %g %g) < (%g %g %g %g)\n",
+ result[0], result[1], result[2], result[3],
+ a[0], a[1], a[2], a[3],
+ b[0], b[1], b[2], b[3]);
+ }
+ }
+ break;
+ case OPCODE_SNE: /* set on not equal */
+ {
+ GLfloat a[4], b[4], result[4];
+ fetch_vector4(&inst->SrcReg[0], machine, a);
+ fetch_vector4(&inst->SrcReg[1], machine, b);
+ result[0] = (a[0] != b[0]) ? 1.0F : 0.0F;
+ result[1] = (a[1] != b[1]) ? 1.0F : 0.0F;
+ result[2] = (a[2] != b[2]) ? 1.0F : 0.0F;
+ result[3] = (a[3] != b[3]) ? 1.0F : 0.0F;
+ store_vector4(inst, machine, result);
+ if (DEBUG_PROG) {
+ printf("SNE (%g %g %g %g) = (%g %g %g %g) != (%g %g %g %g)\n",
+ result[0], result[1], result[2], result[3],
+ a[0], a[1], a[2], a[3],
+ b[0], b[1], b[2], b[3]);
+ }
+ }
+ break;
+ case OPCODE_SSG: /* set sign (-1, 0 or +1) */
+ {
+ GLfloat a[4], result[4];
+ fetch_vector4(&inst->SrcReg[0], machine, a);
+ result[0] = (GLfloat) ((a[0] > 0.0F) - (a[0] < 0.0F));
+ result[1] = (GLfloat) ((a[1] > 0.0F) - (a[1] < 0.0F));
+ result[2] = (GLfloat) ((a[2] > 0.0F) - (a[2] < 0.0F));
+ result[3] = (GLfloat) ((a[3] > 0.0F) - (a[3] < 0.0F));
+ store_vector4(inst, machine, result);
+ }
+ break;
+ case OPCODE_STR: /* set true, operands ignored */
+ {
+ static const GLfloat result[4] = { 1.0F, 1.0F, 1.0F, 1.0F };
+ store_vector4(inst, machine, result);
+ }
+ break;
+ case OPCODE_SUB:
+ {
+ GLfloat a[4], b[4], result[4];
+ fetch_vector4(&inst->SrcReg[0], machine, a);
+ fetch_vector4(&inst->SrcReg[1], machine, b);
+ result[0] = a[0] - b[0];
+ result[1] = a[1] - b[1];
+ result[2] = a[2] - b[2];
+ result[3] = a[3] - b[3];
+ store_vector4(inst, machine, result);
+ if (DEBUG_PROG) {
+ printf("SUB (%g %g %g %g) = (%g %g %g %g) - (%g %g %g %g)\n",
+ result[0], result[1], result[2], result[3],
+ a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
+ }
+ }
+ break;
+ case OPCODE_SWZ: /* extended swizzle */
+ {
+ const struct prog_src_register *source = &inst->SrcReg[0];
+ const GLfloat *src = get_src_register_pointer(source, machine);
+ GLfloat result[4];
+ GLuint i;
+ for (i = 0; i < 4; i++) {
+ const GLuint swz = GET_SWZ(source->Swizzle, i);
+ if (swz == SWIZZLE_ZERO)
+ result[i] = 0.0;
+ else if (swz == SWIZZLE_ONE)
+ result[i] = 1.0;
+ else {
+ ASSERT(swz >= 0);
+ ASSERT(swz <= 3);
+ result[i] = src[swz];
+ }
+ if (source->Negate & (1 << i))
+ result[i] = -result[i];
+ }
+ store_vector4(inst, machine, result);
+ }
+ break;
+ case OPCODE_TEX: /* Both ARB and NV frag prog */
+ /* Simple texel lookup */
+ {
+ GLfloat texcoord[4], color[4];
+ fetch_vector4(&inst->SrcReg[0], machine, texcoord);
+
+ fetch_texel(ctx, machine, inst, texcoord, 0.0, color);
+
+ if (DEBUG_PROG) {
+ printf("TEX (%g, %g, %g, %g) = texture[%d][%g, %g, %g, %g]\n",
+ color[0], color[1], color[2], color[3],
+ inst->TexSrcUnit,
+ texcoord[0], texcoord[1], texcoord[2], texcoord[3]);
+ }
+ store_vector4(inst, machine, color);
+ }
+ break;
+ case OPCODE_TXB: /* GL_ARB_fragment_program only */
+ /* Texel lookup with LOD bias */
+ {
+ GLfloat texcoord[4], color[4], lodBias;
+
+ fetch_vector4(&inst->SrcReg[0], machine, texcoord);
+
+ /* texcoord[3] is the bias to add to lambda */
+ lodBias = texcoord[3];
+
+ fetch_texel(ctx, machine, inst, texcoord, lodBias, color);
+
+ store_vector4(inst, machine, color);
+ }
+ break;
+ case OPCODE_TXD: /* GL_NV_fragment_program only */
+ /* Texture lookup w/ partial derivatives for LOD */
+ {
+ GLfloat texcoord[4], dtdx[4], dtdy[4], color[4];
+ fetch_vector4(&inst->SrcReg[0], machine, texcoord);
+ fetch_vector4(&inst->SrcReg[1], machine, dtdx);
+ fetch_vector4(&inst->SrcReg[2], machine, dtdy);
+ machine->FetchTexelDeriv(ctx, texcoord, dtdx, dtdy,
+ 0.0, /* lodBias */
+ inst->TexSrcUnit, color);
+ store_vector4(inst, machine, color);
+ }
+ break;
+ case OPCODE_TXP: /* GL_ARB_fragment_program only */
+ /* Texture lookup w/ projective divide */
+ {
+ GLfloat texcoord[4], color[4];
+
+ fetch_vector4(&inst->SrcReg[0], machine, texcoord);
+ /* Not so sure about this test - if texcoord[3] is
+ * zero, we'd probably be fine except for an ASSERT in
+ * IROUND_POS() which gets triggered by the inf values created.
+ */
+ if (texcoord[3] != 0.0) {
+ texcoord[0] /= texcoord[3];
+ texcoord[1] /= texcoord[3];
+ texcoord[2] /= texcoord[3];
+ }
+
+ fetch_texel(ctx, machine, inst, texcoord, 0.0, color);
+
+ store_vector4(inst, machine, color);
+ }
+ break;
+ case OPCODE_TXP_NV: /* GL_NV_fragment_program only */
+ /* Texture lookup w/ projective divide, as above, but do not
+ * do the divide by w if sampling from a cube map.
+ */
+ {
+ GLfloat texcoord[4], color[4];
+
+ fetch_vector4(&inst->SrcReg[0], machine, texcoord);
+ if (inst->TexSrcTarget != TEXTURE_CUBE_INDEX &&
+ texcoord[3] != 0.0) {
+ texcoord[0] /= texcoord[3];
+ texcoord[1] /= texcoord[3];
+ texcoord[2] /= texcoord[3];
+ }
+
+ fetch_texel(ctx, machine, inst, texcoord, 0.0, color);
+
+ store_vector4(inst, machine, color);
+ }
+ break;
+ case OPCODE_TRUNC: /* truncate toward zero */
+ {
+ GLfloat a[4], result[4];
+ fetch_vector4(&inst->SrcReg[0], machine, a);
+ result[0] = (GLfloat) (GLint) a[0];
+ result[1] = (GLfloat) (GLint) a[1];
+ result[2] = (GLfloat) (GLint) a[2];
+ result[3] = (GLfloat) (GLint) a[3];
+ store_vector4(inst, machine, result);
+ }
+ break;
+ case OPCODE_UP2H: /* unpack two 16-bit floats */
+ {
+ const GLuint raw = fetch_vector1ui(&inst->SrcReg[0], machine);
+ GLfloat result[4];
+ GLushort hx, hy;
+ hx = raw & 0xffff;
+ hy = raw >> 16;
+ result[0] = result[2] = _mesa_half_to_float(hx);
+ result[1] = result[3] = _mesa_half_to_float(hy);
+ store_vector4(inst, machine, result);
+ }
+ break;
+ case OPCODE_UP2US: /* unpack two GLushorts */
+ {
+ const GLuint raw = fetch_vector1ui(&inst->SrcReg[0], machine);
+ GLfloat result[4];
+ GLushort usx, usy;
+ usx = raw & 0xffff;
+ usy = raw >> 16;
+ result[0] = result[2] = usx * (1.0f / 65535.0f);
+ result[1] = result[3] = usy * (1.0f / 65535.0f);
+ store_vector4(inst, machine, result);
+ }
+ break;
+ case OPCODE_UP4B: /* unpack four GLbytes */
+ {
+ const GLuint raw = fetch_vector1ui(&inst->SrcReg[0], machine);
+ GLfloat result[4];
+ result[0] = (((raw >> 0) & 0xff) - 128) / 127.0F;
+ result[1] = (((raw >> 8) & 0xff) - 128) / 127.0F;
+ result[2] = (((raw >> 16) & 0xff) - 128) / 127.0F;
+ result[3] = (((raw >> 24) & 0xff) - 128) / 127.0F;
+ store_vector4(inst, machine, result);
+ }
+ break;
+ case OPCODE_UP4UB: /* unpack four GLubytes */
+ {
+ const GLuint raw = fetch_vector1ui(&inst->SrcReg[0], machine);
+ GLfloat result[4];
+ result[0] = ((raw >> 0) & 0xff) / 255.0F;
+ result[1] = ((raw >> 8) & 0xff) / 255.0F;
+ result[2] = ((raw >> 16) & 0xff) / 255.0F;
+ result[3] = ((raw >> 24) & 0xff) / 255.0F;
+ store_vector4(inst, machine, result);
+ }
+ break;
+ case OPCODE_XOR: /* bitwise XOR */
+ {
+ GLuint a[4], b[4], result[4];
+ fetch_vector4ui(&inst->SrcReg[0], machine, a);
+ fetch_vector4ui(&inst->SrcReg[1], machine, b);
+ result[0] = a[0] ^ b[0];
+ result[1] = a[1] ^ b[1];
+ result[2] = a[2] ^ b[2];
+ result[3] = a[3] ^ b[3];
+ store_vector4ui(inst, machine, result);
+ }
+ break;
+ case OPCODE_XPD: /* cross product */
+ {
+ GLfloat a[4], b[4], result[4];
+ fetch_vector4(&inst->SrcReg[0], machine, a);
+ fetch_vector4(&inst->SrcReg[1], machine, b);
+ result[0] = a[1] * b[2] - a[2] * b[1];
+ result[1] = a[2] * b[0] - a[0] * b[2];
+ result[2] = a[0] * b[1] - a[1] * b[0];
+ result[3] = 1.0;
+ store_vector4(inst, machine, result);
+ if (DEBUG_PROG) {
+ printf("XPD (%g %g %g %g) = (%g %g %g) X (%g %g %g)\n",
+ result[0], result[1], result[2], result[3],
+ a[0], a[1], a[2], b[0], b[1], b[2]);
+ }
+ }
+ break;
+ case OPCODE_X2D: /* 2-D matrix transform */
+ {
+ GLfloat a[4], b[4], c[4], result[4];
+ fetch_vector4(&inst->SrcReg[0], machine, a);
+ fetch_vector4(&inst->SrcReg[1], machine, b);
+ fetch_vector4(&inst->SrcReg[2], machine, c);
+ result[0] = a[0] + b[0] * c[0] + b[1] * c[1];
+ result[1] = a[1] + b[0] * c[2] + b[1] * c[3];
+ result[2] = a[2] + b[0] * c[0] + b[1] * c[1];
+ result[3] = a[3] + b[0] * c[2] + b[1] * c[3];
+ store_vector4(inst, machine, result);
+ }
+ break;
+ case OPCODE_PRINT:
+ {
+ if (inst->SrcReg[0].File != PROGRAM_UNDEFINED) {
+ GLfloat a[4];
+ fetch_vector4(&inst->SrcReg[0], machine, a);
+ printf("%s%g, %g, %g, %g\n", (const char *) inst->Data,
+ a[0], a[1], a[2], a[3]);
+ }
+ else {
+ printf("%s\n", (const char *) inst->Data);
+ }
+ }
+ break;
+ case OPCODE_END:
+ return GL_TRUE;
+ default:
+ _mesa_problem(ctx, "Bad opcode %d in _mesa_execute_program",
+ inst->Opcode);
+ return GL_TRUE; /* return value doesn't matter */
+ }
+
+ numExec++;
+ if (numExec > maxExec) {
++ static GLboolean reported = GL_FALSE;
++ if (!reported) {
++ _mesa_problem(ctx, "Infinite loop detected in fragment program");
++ reported = GL_TRUE;
++ }
+ return GL_TRUE;
+ }
+
+ } /* for pc */
+
+ return GL_TRUE;
+ }
--- /dev/null
- * For BRK, points to BGNLOOP (which points to ENDLOOP).
+ /*
+ * Mesa 3-D graphics library
+ * Version: 7.3
+ *
+ * Copyright (C) 1999-2008 Brian Paul All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+ /**
+ * \file prog_instruction.h
+ *
+ * Vertex/fragment program instruction datatypes and constants.
+ *
+ * \author Brian Paul
+ * \author Keith Whitwell
+ * \author Ian Romanick <idr@us.ibm.com>
+ */
+
+
+ #ifndef PROG_INSTRUCTION_H
+ #define PROG_INSTRUCTION_H
+
+
+ #include "main/mfeatures.h"
+
+
+ /**
+ * Swizzle indexes.
+ * Do not change!
+ */
+ /*@{*/
+ #define SWIZZLE_X 0
+ #define SWIZZLE_Y 1
+ #define SWIZZLE_Z 2
+ #define SWIZZLE_W 3
+ #define SWIZZLE_ZERO 4 /**< For SWZ instruction only */
+ #define SWIZZLE_ONE 5 /**< For SWZ instruction only */
+ #define SWIZZLE_NIL 7 /**< used during shader code gen (undefined value) */
+ /*@}*/
+
+ #define MAKE_SWIZZLE4(a,b,c,d) (((a)<<0) | ((b)<<3) | ((c)<<6) | ((d)<<9))
+ #define SWIZZLE_NOOP MAKE_SWIZZLE4(0,1,2,3)
+ #define GET_SWZ(swz, idx) (((swz) >> ((idx)*3)) & 0x7)
+ #define GET_BIT(msk, idx) (((msk) >> (idx)) & 0x1)
+
+ #define SWIZZLE_XYZW MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W)
+ #define SWIZZLE_XXXX MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)
+ #define SWIZZLE_YYYY MAKE_SWIZZLE4(SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y)
+ #define SWIZZLE_ZZZZ MAKE_SWIZZLE4(SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z)
+ #define SWIZZLE_WWWW MAKE_SWIZZLE4(SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W)
+
+
+ /**
+ * Writemask values, 1 bit per component.
+ */
+ /*@{*/
+ #define WRITEMASK_X 0x1
+ #define WRITEMASK_Y 0x2
+ #define WRITEMASK_XY 0x3
+ #define WRITEMASK_Z 0x4
+ #define WRITEMASK_XZ 0x5
+ #define WRITEMASK_YZ 0x6
+ #define WRITEMASK_XYZ 0x7
+ #define WRITEMASK_W 0x8
+ #define WRITEMASK_XW 0x9
+ #define WRITEMASK_YW 0xa
+ #define WRITEMASK_XYW 0xb
+ #define WRITEMASK_ZW 0xc
+ #define WRITEMASK_XZW 0xd
+ #define WRITEMASK_YZW 0xe
+ #define WRITEMASK_XYZW 0xf
+ /*@}*/
+
+
+ /**
+ * Condition codes
+ */
+ /*@{*/
+ #define COND_GT 1 /**< greater than zero */
+ #define COND_EQ 2 /**< equal to zero */
+ #define COND_LT 3 /**< less than zero */
+ #define COND_UN 4 /**< unordered (NaN) */
+ #define COND_GE 5 /**< greater than or equal to zero */
+ #define COND_LE 6 /**< less than or equal to zero */
+ #define COND_NE 7 /**< not equal to zero */
+ #define COND_TR 8 /**< always true */
+ #define COND_FL 9 /**< always false */
+ /*@}*/
+
+
+ /**
+ * Instruction precision for GL_NV_fragment_program
+ */
+ /*@{*/
+ #define FLOAT32 0x1
+ #define FLOAT16 0x2
+ #define FIXED12 0x4
+ /*@}*/
+
+
+ /**
+ * Saturation modes when storing values.
+ */
+ /*@{*/
+ #define SATURATE_OFF 0
+ #define SATURATE_ZERO_ONE 1
+ /*@}*/
+
+
+ /**
+ * Per-component negation masks
+ */
+ /*@{*/
+ #define NEGATE_X 0x1
+ #define NEGATE_Y 0x2
+ #define NEGATE_Z 0x4
+ #define NEGATE_W 0x8
+ #define NEGATE_XYZ 0x7
+ #define NEGATE_XYZW 0xf
+ #define NEGATE_NONE 0x0
+ /*@}*/
+
+
+ /**
+ * Program instruction opcodes for vertex, fragment and geometry programs.
+ */
+ typedef enum prog_opcode {
+ /* ARB_vp ARB_fp NV_vp NV_fp GLSL */
+ /*------------------------------------------*/
+ OPCODE_NOP = 0, /* X */
+ OPCODE_ABS, /* X X 1.1 X */
+ OPCODE_ADD, /* X X X X X */
+ OPCODE_AND, /* */
+ OPCODE_ARA, /* 2 */
+ OPCODE_ARL, /* X X */
+ OPCODE_ARL_NV, /* 2 */
+ OPCODE_ARR, /* 2 */
+ OPCODE_BGNLOOP, /* opt */
+ OPCODE_BGNSUB, /* opt */
+ OPCODE_BRA, /* 2 X */
+ OPCODE_BRK, /* 2 opt */
+ OPCODE_CAL, /* 2 2 */
+ OPCODE_CMP, /* X */
+ OPCODE_CONT, /* opt */
+ OPCODE_COS, /* X 2 X X */
+ OPCODE_DDX, /* X X */
+ OPCODE_DDY, /* X X */
+ OPCODE_DP2, /* 2 */
+ OPCODE_DP2A, /* 2 */
+ OPCODE_DP3, /* X X X X X */
+ OPCODE_DP4, /* X X X X X */
+ OPCODE_DPH, /* X X 1.1 */
+ OPCODE_DST, /* X X X X */
+ OPCODE_ELSE, /* X */
+ OPCODE_EMIT_VERTEX,/* X */
+ OPCODE_END, /* X X X X opt */
+ OPCODE_END_PRIMITIVE,/* X */
+ OPCODE_ENDIF, /* opt */
+ OPCODE_ENDLOOP, /* opt */
+ OPCODE_ENDSUB, /* opt */
+ OPCODE_EX2, /* X X 2 X X */
+ OPCODE_EXP, /* X X X */
+ OPCODE_FLR, /* X X 2 X X */
+ OPCODE_FRC, /* X X 2 X X */
+ OPCODE_IF, /* opt */
+ OPCODE_KIL, /* X */
+ OPCODE_KIL_NV, /* X X */
+ OPCODE_LG2, /* X X 2 X X */
+ OPCODE_LIT, /* X X X X */
+ OPCODE_LOG, /* X X X */
+ OPCODE_LRP, /* X X */
+ OPCODE_MAD, /* X X X X X */
+ OPCODE_MAX, /* X X X X X */
+ OPCODE_MIN, /* X X X X X */
+ OPCODE_MOV, /* X X X X X */
+ OPCODE_MUL, /* X X X X X */
+ OPCODE_NOISE1, /* X */
+ OPCODE_NOISE2, /* X */
+ OPCODE_NOISE3, /* X */
+ OPCODE_NOISE4, /* X */
+ OPCODE_NOT, /* */
+ OPCODE_NRM3, /* */
+ OPCODE_NRM4, /* */
+ OPCODE_OR, /* */
+ OPCODE_PK2H, /* X */
+ OPCODE_PK2US, /* X */
+ OPCODE_PK4B, /* X */
+ OPCODE_PK4UB, /* X */
+ OPCODE_POW, /* X X X X */
+ OPCODE_POPA, /* 3 */
+ OPCODE_PRINT, /* X X */
+ OPCODE_PUSHA, /* 3 */
+ OPCODE_RCC, /* 1.1 */
+ OPCODE_RCP, /* X X X X X */
+ OPCODE_RET, /* 2 2 */
+ OPCODE_RFL, /* X X */
+ OPCODE_RSQ, /* X X X X X */
+ OPCODE_SCS, /* X */
+ OPCODE_SEQ, /* 2 X X */
+ OPCODE_SFL, /* 2 X */
+ OPCODE_SGE, /* X X X X X */
+ OPCODE_SGT, /* 2 X X */
+ OPCODE_SIN, /* X 2 X X */
+ OPCODE_SLE, /* 2 X X */
+ OPCODE_SLT, /* X X X X X */
+ OPCODE_SNE, /* 2 X X */
+ OPCODE_SSG, /* 2 */
+ OPCODE_STR, /* 2 X */
+ OPCODE_SUB, /* X X 1.1 X X */
+ OPCODE_SWZ, /* X X */
+ OPCODE_TEX, /* X 3 X X */
+ OPCODE_TXB, /* X 3 X */
+ OPCODE_TXD, /* X X */
+ OPCODE_TXL, /* 3 2 X */
+ OPCODE_TXP, /* X X */
+ OPCODE_TXP_NV, /* 3 X */
+ OPCODE_TRUNC, /* X */
+ OPCODE_UP2H, /* X */
+ OPCODE_UP2US, /* X */
+ OPCODE_UP4B, /* X */
+ OPCODE_UP4UB, /* X */
+ OPCODE_X2D, /* X */
+ OPCODE_XOR, /* */
+ OPCODE_XPD, /* X X X */
+ MAX_OPCODE
+ } gl_inst_opcode;
+
+
+ /**
+ * Number of bits for the src/dst register Index field.
+ * This limits the size of temp/uniform register files.
+ */
+ #define INST_INDEX_BITS 10
+
+
+ /**
+ * Instruction source register.
+ */
+ struct prog_src_register
+ {
+ GLuint File:4; /**< One of the PROGRAM_* register file values. */
+ GLint Index:(INST_INDEX_BITS+1); /**< Extra bit here for sign bit.
+ * May be negative for relative addressing.
+ */
+ GLuint Swizzle:12;
+ GLuint RelAddr:1;
+
+ /** Take the component-wise absolute value */
+ GLuint Abs:1;
+
+ /**
+ * Post-Abs negation.
+ * This will either be NEGATE_NONE or NEGATE_XYZW, except for the SWZ
+ * instruction which allows per-component negation.
+ */
+ GLuint Negate:4;
+
+ /**
+ * Is the register two-dimensional.
+ * Two dimensional registers are of the
+ * REGISTER[index][index2] format.
+ * They are used by the geometry shaders where
+ * the first index is the index within an array
+ * and the second index is the semantic of the
+ * array, e.g. gl_PositionIn[index] would become
+ * INPUT[index][gl_PositionIn]
+ */
+ GLuint HasIndex2:1;
+ GLuint RelAddr2:1;
+ GLint Index2:(INST_INDEX_BITS+1); /**< Extra bit here for sign bit.
+ * May be negative for relative
+ * addressing. */
+ };
+
+
+ /**
+ * Instruction destination register.
+ */
+ struct prog_dst_register
+ {
+ GLuint File:4; /**< One of the PROGRAM_* register file values */
+ GLuint Index:INST_INDEX_BITS; /**< Unsigned, never negative */
+ GLuint WriteMask:4;
+ GLuint RelAddr:1;
+
+ /**
+ * \name Conditional destination update control.
+ *
+ * \since
+ * NV_fragment_program, NV_fragment_program_option, NV_vertex_program2,
+ * NV_vertex_program2_option.
+ */
+ /*@{*/
+ /**
+ * Takes one of the 9 possible condition values (EQ, FL, GT, GE, LE, LT,
+ * NE, TR, or UN). Dest reg is only written to if the matching
+ * (swizzled) condition code value passes. When a conditional update mask
+ * is not specified, this will be \c COND_TR.
+ */
+ GLuint CondMask:4;
+
+ /**
+ * Condition code swizzle value.
+ */
+ GLuint CondSwizzle:12;
+
+ /**
+ * Selects the condition code register to use for conditional destination
+ * update masking. In NV_fragmnet_program or NV_vertex_program2 mode, only
+ * condition code register 0 is available. In NV_vertex_program3 mode,
+ * condition code registers 0 and 1 are available.
+ */
+ GLuint CondSrc:1;
+ /*@}*/
+ };
+
+
+ /**
+ * Vertex/fragment program instruction.
+ */
+ struct prog_instruction
+ {
+ gl_inst_opcode Opcode;
+ struct prog_src_register SrcReg[3];
+ struct prog_dst_register DstReg;
+
+ /**
+ * Indicates that the instruction should update the condition code
+ * register.
+ *
+ * \since
+ * NV_fragment_program, NV_fragment_program_option, NV_vertex_program2,
+ * NV_vertex_program2_option.
+ */
+ GLuint CondUpdate:1;
+
+ /**
+ * If prog_instruction::CondUpdate is \c GL_TRUE, this value selects the
+ * condition code register that is to be updated.
+ *
+ * In GL_NV_fragment_program or GL_NV_vertex_program2 mode, only condition
+ * code register 0 is available. In GL_NV_vertex_program3 mode, condition
+ * code registers 0 and 1 are available.
+ *
+ * \since
+ * NV_fragment_program, NV_fragment_program_option, NV_vertex_program2,
+ * NV_vertex_program2_option.
+ */
+ GLuint CondDst:1;
+
+ /**
+ * Saturate each value of the vectored result to the range [0,1] or the
+ * range [-1,1]. \c SSAT mode (i.e., saturation to the range [-1,1]) is
+ * only available in NV_fragment_program2 mode.
+ * Value is one of the SATURATE_* tokens.
+ *
+ * \since
+ * NV_fragment_program, NV_fragment_program_option, NV_vertex_program3.
+ */
+ GLuint SaturateMode:2;
+
+ /**
+ * Per-instruction selectable precision: FLOAT32, FLOAT16, FIXED12.
+ *
+ * \since
+ * NV_fragment_program, NV_fragment_program_option.
+ */
+ GLuint Precision:3;
+
+ /**
+ * \name Extra fields for TEX, TXB, TXD, TXL, TXP instructions.
+ */
+ /*@{*/
+ /** Source texture unit. */
+ GLuint TexSrcUnit:5;
+
+ /** Source texture target, one of TEXTURE_{1D,2D,3D,CUBE,RECT}_INDEX */
+ GLuint TexSrcTarget:3;
+
+ /** True if tex instruction should do shadow comparison */
+ GLuint TexShadow:1;
+ /*@}*/
+
+ /**
+ * For BRA and CAL instructions, the location to jump to.
+ * For BGNLOOP, points to ENDLOOP (and vice-versa).
++ * For BRK, points to ENDLOOP
+ * For IF, points to ELSE or ENDIF.
+ * For ELSE, points to ENDIF.
+ */
+ GLint BranchTarget;
+
+ /** for debugging purposes */
+ const char *Comment;
+
+ /** Arbitrary data. Used for OPCODE_PRINT and some drivers */
+ void *Data;
+
+ /** for driver use (try to remove someday) */
+ GLint Aux;
+ };
+
+
+ extern void
+ _mesa_init_instructions(struct prog_instruction *inst, GLuint count);
+
+ extern struct prog_instruction *
+ _mesa_alloc_instructions(GLuint numInst);
+
+ extern struct prog_instruction *
+ _mesa_realloc_instructions(struct prog_instruction *oldInst,
+ GLuint numOldInst, GLuint numNewInst);
+
+ extern struct prog_instruction *
+ _mesa_copy_instructions(struct prog_instruction *dest,
+ const struct prog_instruction *src, GLuint n);
+
+ extern void
+ _mesa_free_instructions(struct prog_instruction *inst, GLuint count);
+
+ extern GLuint
+ _mesa_num_inst_src_regs(gl_inst_opcode opcode);
+
+ extern GLuint
+ _mesa_num_inst_dst_regs(gl_inst_opcode opcode);
+
+ extern GLboolean
+ _mesa_is_tex_instruction(gl_inst_opcode opcode);
+
+ extern GLboolean
+ _mesa_check_soa_dependencies(const struct prog_instruction *inst);
+
+ extern const char *
+ _mesa_opcode_string(gl_inst_opcode opcode);
+
+
+ #endif /* PROG_INSTRUCTION_H */
--- /dev/null
-update_interval(GLint intBegin[], GLint intEnd[], GLuint index, GLuint ic)
+ /*
+ * Mesa 3-D graphics library
+ * Version: 7.5
+ *
+ * Copyright (C) 2009 VMware, Inc. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * VMWARE BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+
+ #include "main/glheader.h"
+ #include "main/context.h"
+ #include "main/macros.h"
+ #include "program.h"
+ #include "prog_instruction.h"
+ #include "prog_optimize.h"
+ #include "prog_print.h"
+
+
+ #define MAX_LOOP_NESTING 50
+
+
+ static GLboolean dbg = GL_FALSE;
+
+ /* Returns the mask of channels read from the given srcreg in this instruction.
+ */
+ static GLuint
+ get_src_arg_mask(const struct prog_instruction *inst, int arg)
+ {
+ int writemask = inst->DstReg.WriteMask;
+
+ if (inst->CondUpdate)
+ writemask = WRITEMASK_XYZW;
+
+ switch (inst->Opcode) {
+ case OPCODE_MOV:
+ case OPCODE_ABS:
+ case OPCODE_ADD:
+ case OPCODE_MUL:
+ case OPCODE_SUB:
+ return writemask;
+ case OPCODE_RCP:
+ case OPCODE_SIN:
+ case OPCODE_COS:
+ case OPCODE_RSQ:
+ case OPCODE_POW:
+ case OPCODE_EX2:
+ return WRITEMASK_X;
+ case OPCODE_DP2:
+ return WRITEMASK_XY;
+ case OPCODE_DP3:
+ case OPCODE_XPD:
+ return WRITEMASK_XYZ;
+ default:
+ return WRITEMASK_XYZW;
+ }
+ }
+
+ /**
+ * In 'prog' remove instruction[i] if removeFlags[i] == TRUE.
+ * \return number of instructions removed
+ */
+ static GLuint
+ remove_instructions(struct gl_program *prog, const GLboolean *removeFlags)
+ {
+ GLint i, removeEnd = 0, removeCount = 0;
+ GLuint totalRemoved = 0;
+
+ /* go backward */
+ for (i = prog->NumInstructions - 1; i >= 0; i--) {
+ if (removeFlags[i]) {
+ totalRemoved++;
+ if (removeCount == 0) {
+ /* begin a run of instructions to remove */
+ removeEnd = i;
+ removeCount = 1;
+ }
+ else {
+ /* extend the run of instructions to remove */
+ removeCount++;
+ }
+ }
+ else {
+ /* don't remove this instruction, but check if the preceeding
+ * instructions are to be removed.
+ */
+ if (removeCount > 0) {
+ GLint removeStart = removeEnd - removeCount + 1;
+ _mesa_delete_instructions(prog, removeStart, removeCount);
+ removeStart = removeCount = 0; /* reset removal info */
+ }
+ }
+ }
+ /* Finish removing if the first instruction was to be removed. */
+ if (removeCount > 0) {
+ GLint removeStart = removeEnd - removeCount + 1;
+ _mesa_delete_instructions(prog, removeStart, removeCount);
+ }
+ return totalRemoved;
+ }
+
+
+ /**
+ * Remap register indexes according to map.
+ * \param prog the program to search/replace
+ * \param file the type of register file to search/replace
+ * \param map maps old register indexes to new indexes
+ */
+ static void
+ replace_regs(struct gl_program *prog, gl_register_file file, const GLint map[])
+ {
+ GLuint i;
+
+ for (i = 0; i < prog->NumInstructions; i++) {
+ struct prog_instruction *inst = prog->Instructions + i;
+ const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode);
+ GLuint j;
+ for (j = 0; j < numSrc; j++) {
+ if (inst->SrcReg[j].File == file) {
+ GLuint index = inst->SrcReg[j].Index;
+ ASSERT(map[index] >= 0);
+ inst->SrcReg[j].Index = map[index];
+ }
+ }
+ if (inst->DstReg.File == file) {
+ const GLuint index = inst->DstReg.Index;
+ ASSERT(map[index] >= 0);
+ inst->DstReg.Index = map[index];
+ }
+ }
+ }
+
+
+ /**
+ * Consolidate temporary registers to use low numbers. For example, if the
+ * shader only uses temps 4, 5, 8, replace them with 0, 1, 2.
+ */
+ static void
+ _mesa_consolidate_registers(struct gl_program *prog)
+ {
+ GLboolean tempUsed[MAX_PROGRAM_TEMPS];
+ GLint tempMap[MAX_PROGRAM_TEMPS];
+ GLuint tempMax = 0, i;
+
+ if (dbg) {
+ printf("Optimize: Begin register consolidation\n");
+ }
+
+ memset(tempUsed, 0, sizeof(tempUsed));
+
+ for (i = 0; i < MAX_PROGRAM_TEMPS; i++) {
+ tempMap[i] = -1;
+ }
+
+ /* set tempUsed[i] if temporary [i] is referenced */
+ for (i = 0; i < prog->NumInstructions; i++) {
+ const struct prog_instruction *inst = prog->Instructions + i;
+ const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode);
+ GLuint j;
+ for (j = 0; j < numSrc; j++) {
+ if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) {
+ const GLuint index = inst->SrcReg[j].Index;
+ ASSERT(index < MAX_PROGRAM_TEMPS);
+ tempUsed[index] = GL_TRUE;
+ tempMax = MAX2(tempMax, index);
+ break;
+ }
+ }
+ if (inst->DstReg.File == PROGRAM_TEMPORARY) {
+ const GLuint index = inst->DstReg.Index;
+ ASSERT(index < MAX_PROGRAM_TEMPS);
+ tempUsed[index] = GL_TRUE;
+ tempMax = MAX2(tempMax, index);
+ }
+ }
+
+ /* allocate a new index for each temp that's used */
+ {
+ GLuint freeTemp = 0;
+ for (i = 0; i <= tempMax; i++) {
+ if (tempUsed[i]) {
+ tempMap[i] = freeTemp++;
+ /*printf("replace %u with %u\n", i, tempMap[i]);*/
+ }
+ }
+ if (freeTemp == tempMax + 1) {
+ /* no consolidation possible */
+ return;
+ }
+ if (dbg) {
+ printf("Replace regs 0..%u with 0..%u\n", tempMax, freeTemp-1);
+ }
+ }
+
+ replace_regs(prog, PROGRAM_TEMPORARY, tempMap);
+
+ if (dbg) {
+ printf("Optimize: End register consolidation\n");
+ }
+ }
+
+
+ /**
+ * Remove dead instructions from the given program.
+ * This is very primitive for now. Basically look for temp registers
+ * that are written to but never read. Remove any instructions that
+ * write to such registers. Be careful with condition code setters.
+ */
+ static void
+ _mesa_remove_dead_code(struct gl_program *prog)
+ {
+ GLboolean tempRead[MAX_PROGRAM_TEMPS][4];
+ GLboolean *removeInst; /* per-instruction removal flag */
+ GLuint i, rem = 0, comp;
+
+ memset(tempRead, 0, sizeof(tempRead));
+
+ if (dbg) {
+ printf("Optimize: Begin dead code removal\n");
+ /*_mesa_print_program(prog);*/
+ }
+
+ removeInst = (GLboolean *)
+ calloc(1, prog->NumInstructions * sizeof(GLboolean));
+
+ /* Determine which temps are read and written */
+ for (i = 0; i < prog->NumInstructions; i++) {
+ const struct prog_instruction *inst = prog->Instructions + i;
+ const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode);
+ GLuint j;
+
+ /* check src regs */
+ for (j = 0; j < numSrc; j++) {
+ if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) {
+ const GLuint index = inst->SrcReg[j].Index;
+ GLuint read_mask;
+ ASSERT(index < MAX_PROGRAM_TEMPS);
+ read_mask = get_src_arg_mask(inst, j);
+
+ if (inst->SrcReg[j].RelAddr) {
+ if (dbg)
+ printf("abort remove dead code (indirect temp)\n");
+ goto done;
+ }
+
+ for (comp = 0; comp < 4; comp++) {
+ GLuint swz = (inst->SrcReg[j].Swizzle >> (3 * comp)) & 0x7;
+
+ if ((read_mask & (1 << comp)) == 0)
+ continue;
+
+ switch (swz) {
+ case SWIZZLE_X:
+ tempRead[index][0] = GL_TRUE;
+ break;
+ case SWIZZLE_Y:
+ tempRead[index][1] = GL_TRUE;
+ break;
+ case SWIZZLE_Z:
+ tempRead[index][2] = GL_TRUE;
+ break;
+ case SWIZZLE_W:
+ tempRead[index][3] = GL_TRUE;
+ break;
+ }
+ }
+ }
+ }
+
+ /* check dst reg */
+ if (inst->DstReg.File == PROGRAM_TEMPORARY) {
+ const GLuint index = inst->DstReg.Index;
+ ASSERT(index < MAX_PROGRAM_TEMPS);
+
+ if (inst->DstReg.RelAddr) {
+ if (dbg)
+ printf("abort remove dead code (indirect temp)\n");
+ goto done;
+ }
+
+ if (inst->CondUpdate) {
+ /* If we're writing to this register and setting condition
+ * codes we cannot remove the instruction. Prevent removal
+ * by setting the 'read' flag.
+ */
+ tempRead[index][0] = GL_TRUE;
+ tempRead[index][1] = GL_TRUE;
+ tempRead[index][2] = GL_TRUE;
+ tempRead[index][3] = GL_TRUE;
+ }
+ }
+ }
+
+ /* find instructions that write to dead registers, flag for removal */
+ for (i = 0; i < prog->NumInstructions; i++) {
+ struct prog_instruction *inst = prog->Instructions + i;
+ const GLuint numDst = _mesa_num_inst_dst_regs(inst->Opcode);
+
+ if (numDst != 0 && inst->DstReg.File == PROGRAM_TEMPORARY) {
+ GLint chan, index = inst->DstReg.Index;
+
+ for (chan = 0; chan < 4; chan++) {
+ if (!tempRead[index][chan] &&
+ inst->DstReg.WriteMask & (1 << chan)) {
+ if (dbg) {
+ printf("Remove writemask on %u.%c\n", i,
+ chan == 3 ? 'w' : 'x' + chan);
+ }
+ inst->DstReg.WriteMask &= ~(1 << chan);
+ rem++;
+ }
+ }
+
+ if (inst->DstReg.WriteMask == 0) {
+ /* If we cleared all writes, the instruction can be removed. */
+ if (dbg)
+ printf("Remove instruction %u: \n", i);
+ removeInst[i] = GL_TRUE;
+ }
+ }
+ }
+
+ /* now remove the instructions which aren't needed */
+ rem = remove_instructions(prog, removeInst);
+
+ if (dbg) {
+ printf("Optimize: End dead code removal.\n");
+ printf(" %u channel writes removed\n", rem);
+ printf(" %u instructions removed\n", rem);
+ /*_mesa_print_program(prog);*/
+ }
+
+ done:
+ free(removeInst);
+ }
+
+
+ enum temp_use
+ {
+ READ,
+ WRITE,
+ FLOW,
+ END
+ };
+
+ /**
+ * Scan forward in program from 'start' for the next occurance of TEMP[index].
+ * Return READ, WRITE, FLOW or END to indicate the next usage or an indicator
+ * that we can't look further.
+ */
+ static enum temp_use
+ find_next_temp_use(const struct gl_program *prog, GLuint start, GLuint index)
+ {
+ GLuint i;
+
+ for (i = start; i < prog->NumInstructions; i++) {
+ const struct prog_instruction *inst = prog->Instructions + i;
+ switch (inst->Opcode) {
+ case OPCODE_BGNLOOP:
+ case OPCODE_ENDLOOP:
+ case OPCODE_BGNSUB:
+ case OPCODE_ENDSUB:
+ return FLOW;
+ default:
+ {
+ const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode);
+ GLuint j;
+ for (j = 0; j < numSrc; j++) {
+ if (inst->SrcReg[j].File == PROGRAM_TEMPORARY &&
+ inst->SrcReg[j].Index == index)
+ return READ;
+ }
+ if (inst->DstReg.File == PROGRAM_TEMPORARY &&
+ inst->DstReg.Index == index)
+ return WRITE;
+ }
+ }
+ }
+
+ return END;
+ }
+
+ static GLboolean _mesa_is_flow_control_opcode(enum prog_opcode opcode)
+ {
+ switch (opcode) {
+ case OPCODE_BGNLOOP:
+ case OPCODE_BGNSUB:
+ case OPCODE_BRA:
+ case OPCODE_CAL:
+ case OPCODE_CONT:
+ case OPCODE_IF:
+ case OPCODE_ELSE:
+ case OPCODE_END:
+ case OPCODE_ENDIF:
+ case OPCODE_ENDLOOP:
+ case OPCODE_ENDSUB:
+ case OPCODE_RET:
+ return GL_TRUE;
+ default:
+ return GL_FALSE;
+ }
+ }
+
+ /**
+ * Try to remove use of extraneous MOV instructions, to free them up for dead
+ * code removal.
+ */
+ static void
+ _mesa_remove_extra_move_use(struct gl_program *prog)
+ {
+ GLuint i, j;
+
+ if (dbg) {
+ printf("Optimize: Begin remove extra move use\n");
+ _mesa_print_program(prog);
+ }
+
+ /*
+ * Look for sequences such as this:
+ * MOV tmpX, arg0;
+ * ...
+ * FOO tmpY, tmpX, arg1;
+ * and convert into:
+ * MOV tmpX, arg0;
+ * ...
+ * FOO tmpY, arg0, arg1;
+ */
+
+ for (i = 0; i + 1 < prog->NumInstructions; i++) {
+ const struct prog_instruction *mov = prog->Instructions + i;
+
+ if (mov->Opcode != OPCODE_MOV ||
+ mov->DstReg.File != PROGRAM_TEMPORARY ||
+ mov->DstReg.RelAddr ||
+ mov->DstReg.CondMask != COND_TR ||
+ mov->SaturateMode != SATURATE_OFF ||
+ mov->SrcReg[0].RelAddr)
+ continue;
+
+ /* Walk through remaining instructions until the or src reg gets
+ * rewritten or we get into some flow-control, eliminating the use of
+ * this MOV.
+ */
+ for (j = i + 1; j < prog->NumInstructions; j++) {
+ struct prog_instruction *inst2 = prog->Instructions + j;
+ GLuint arg;
+
+ if (_mesa_is_flow_control_opcode(inst2->Opcode))
+ break;
+
+ /* First rewrite this instruction's args if appropriate. */
+ for (arg = 0; arg < _mesa_num_inst_src_regs(inst2->Opcode); arg++) {
+ int comp;
+ int read_mask = get_src_arg_mask(inst2, arg);
+
+ if (inst2->SrcReg[arg].File != mov->DstReg.File ||
+ inst2->SrcReg[arg].Index != mov->DstReg.Index ||
+ inst2->SrcReg[arg].RelAddr ||
+ inst2->SrcReg[arg].Abs)
+ continue;
+
+ /* Check that all the sources for this arg of inst2 come from inst1
+ * or constants.
+ */
+ for (comp = 0; comp < 4; comp++) {
+ int src_swz = GET_SWZ(inst2->SrcReg[arg].Swizzle, comp);
+
+ /* If the MOV didn't write that channel, can't use it. */
+ if ((read_mask & (1 << comp)) &&
+ src_swz <= SWIZZLE_W &&
+ (mov->DstReg.WriteMask & (1 << src_swz)) == 0)
+ break;
+ }
+ if (comp != 4)
+ continue;
+
+ /* Adjust the swizzles of inst2 to point at MOV's source */
+ for (comp = 0; comp < 4; comp++) {
+ int inst2_swz = GET_SWZ(inst2->SrcReg[arg].Swizzle, comp);
+
+ if (inst2_swz <= SWIZZLE_W) {
+ GLuint s = GET_SWZ(mov->SrcReg[0].Swizzle, inst2_swz);
+ inst2->SrcReg[arg].Swizzle &= ~(7 << (3 * comp));
+ inst2->SrcReg[arg].Swizzle |= s << (3 * comp);
+ inst2->SrcReg[arg].Negate ^= (((mov->SrcReg[0].Negate >>
+ inst2_swz) & 0x1) << comp);
+ }
+ }
+ inst2->SrcReg[arg].File = mov->SrcReg[0].File;
+ inst2->SrcReg[arg].Index = mov->SrcReg[0].Index;
+ }
+
+ /* If this instruction overwrote part of the move, our time is up. */
+ if ((inst2->DstReg.File == mov->DstReg.File &&
+ (inst2->DstReg.RelAddr ||
+ inst2->DstReg.Index == mov->DstReg.Index)) ||
+ (inst2->DstReg.File == mov->SrcReg[0].File &&
+ (inst2->DstReg.RelAddr ||
+ inst2->DstReg.Index == mov->SrcReg[0].Index)))
+ break;
+ }
+ }
+
+ if (dbg) {
+ printf("Optimize: End remove extra move use.\n");
+ /*_mesa_print_program(prog);*/
+ }
+ }
+
+ /**
+ * Try to remove extraneous MOV instructions from the given program.
+ */
+ static void
+ _mesa_remove_extra_moves(struct gl_program *prog)
+ {
+ GLboolean *removeInst; /* per-instruction removal flag */
+ GLuint i, rem, loopNesting = 0, subroutineNesting = 0;
+
+ if (dbg) {
+ printf("Optimize: Begin remove extra moves\n");
+ _mesa_print_program(prog);
+ }
+
+ removeInst = (GLboolean *)
+ calloc(1, prog->NumInstructions * sizeof(GLboolean));
+
+ /*
+ * Look for sequences such as this:
+ * FOO tmpX, arg0, arg1;
+ * MOV tmpY, tmpX;
+ * and convert into:
+ * FOO tmpY, arg0, arg1;
+ */
+
+ for (i = 0; i < prog->NumInstructions; i++) {
+ const struct prog_instruction *inst = prog->Instructions + i;
+
+ switch (inst->Opcode) {
+ case OPCODE_BGNLOOP:
+ loopNesting++;
+ break;
+ case OPCODE_ENDLOOP:
+ loopNesting--;
+ break;
+ case OPCODE_BGNSUB:
+ subroutineNesting++;
+ break;
+ case OPCODE_ENDSUB:
+ subroutineNesting--;
+ break;
+ case OPCODE_MOV:
+ if (i > 0 &&
+ loopNesting == 0 &&
+ subroutineNesting == 0 &&
+ inst->SrcReg[0].File == PROGRAM_TEMPORARY &&
+ inst->SrcReg[0].Swizzle == SWIZZLE_XYZW) {
+ /* see if this MOV can be removed */
+ const GLuint tempIndex = inst->SrcReg[0].Index;
+ struct prog_instruction *prevInst;
+ GLuint prevI;
+
+ /* get pointer to previous instruction */
+ prevI = i - 1;
+ while (prevI > 0 && removeInst[prevI])
+ prevI--;
+ prevInst = prog->Instructions + prevI;
+
+ if (prevInst->DstReg.File == PROGRAM_TEMPORARY &&
+ prevInst->DstReg.Index == tempIndex &&
+ prevInst->DstReg.WriteMask == WRITEMASK_XYZW) {
+
+ enum temp_use next_use =
+ find_next_temp_use(prog, i + 1, tempIndex);
+
+ if (next_use == WRITE || next_use == END) {
+ /* OK, we can safely remove this MOV instruction.
+ * Transform:
+ * prevI: FOO tempIndex, x, y;
+ * i: MOV z, tempIndex;
+ * Into:
+ * prevI: FOO z, x, y;
+ */
+
+ /* patch up prev inst */
+ prevInst->DstReg.File = inst->DstReg.File;
+ prevInst->DstReg.Index = inst->DstReg.Index;
+
+ /* flag this instruction for removal */
+ removeInst[i] = GL_TRUE;
+
+ if (dbg) {
+ printf("Remove MOV at %u\n", i);
+ printf("new prev inst %u: ", prevI);
+ _mesa_print_instruction(prevInst);
+ }
+ }
+ }
+ }
+ break;
+ default:
+ ; /* nothing */
+ }
+ }
+
+ /* now remove the instructions which aren't needed */
+ rem = remove_instructions(prog, removeInst);
+
+ free(removeInst);
+
+ if (dbg) {
+ printf("Optimize: End remove extra moves. %u instructions removed\n", rem);
+ /*_mesa_print_program(prog);*/
+ }
+ }
+
+
+ /** A live register interval */
+ struct interval
+ {
+ GLuint Reg; /** The temporary register index */
+ GLuint Start, End; /** Start/end instruction numbers */
+ };
+
+
+ /** A list of register intervals */
+ struct interval_list
+ {
+ GLuint Num;
+ struct interval Intervals[MAX_PROGRAM_TEMPS];
+ };
+
+
+ static void
+ append_interval(struct interval_list *list, const struct interval *inv)
+ {
+ list->Intervals[list->Num++] = *inv;
+ }
+
+
+ /** Insert interval inv into list, sorted by interval end */
+ static void
+ insert_interval_by_end(struct interval_list *list, const struct interval *inv)
+ {
+ /* XXX we could do a binary search insertion here since list is sorted */
+ GLint i = list->Num - 1;
+ while (i >= 0 && list->Intervals[i].End > inv->End) {
+ list->Intervals[i + 1] = list->Intervals[i];
+ i--;
+ }
+ list->Intervals[i + 1] = *inv;
+ list->Num++;
+
+ #ifdef DEBUG
+ {
+ GLuint i;
+ for (i = 0; i + 1 < list->Num; i++) {
+ ASSERT(list->Intervals[i].End <= list->Intervals[i + 1].End);
+ }
+ }
+ #endif
+ }
+
+
+ /** Remove the given interval from the interval list */
+ static void
+ remove_interval(struct interval_list *list, const struct interval *inv)
+ {
+ /* XXX we could binary search since list is sorted */
+ GLuint k;
+ for (k = 0; k < list->Num; k++) {
+ if (list->Intervals[k].Reg == inv->Reg) {
+ /* found, remove it */
+ ASSERT(list->Intervals[k].Start == inv->Start);
+ ASSERT(list->Intervals[k].End == inv->End);
+ while (k < list->Num - 1) {
+ list->Intervals[k] = list->Intervals[k + 1];
+ k++;
+ }
+ list->Num--;
+ return;
+ }
+ }
+ }
+
+
+ /** called by qsort() */
+ static int
+ compare_start(const void *a, const void *b)
+ {
+ const struct interval *ia = (const struct interval *) a;
+ const struct interval *ib = (const struct interval *) b;
+ if (ia->Start < ib->Start)
+ return -1;
+ else if (ia->Start > ib->Start)
+ return +1;
+ else
+ return 0;
+ }
+
+ /** sort the interval list according to interval starts */
+ static void
+ sort_interval_list_by_start(struct interval_list *list)
+ {
+ qsort(list->Intervals, list->Num, sizeof(struct interval), compare_start);
+ #ifdef DEBUG
+ {
+ GLuint i;
+ for (i = 0; i + 1 < list->Num; i++) {
+ ASSERT(list->Intervals[i].Start <= list->Intervals[i + 1].Start);
+ }
+ }
+ #endif
+ }
+
++struct loop_info
++{
++ GLuint Start, End; /**< Start, end instructions of loop */
++};
+
+ /**
+ * Update the intermediate interval info for register 'index' and
+ * instruction 'ic'.
+ */
+ static void
- struct loop_info
- {
- GLuint Start, End; /**< Start, end instructions of loop */
- };
++update_interval(GLint intBegin[], GLint intEnd[],
++ struct loop_info *loopStack, GLuint loopStackDepth,
++ GLuint index, GLuint ic)
+ {
++ int i;
++
++ /* If the register is used in a loop, extend its lifetime through the end
++ * of the outermost loop that doesn't contain its definition.
++ */
++ for (i = 0; i < loopStackDepth; i++) {
++ if (intBegin[index] < loopStack[i].Start) {
++ ic = loopStack[i].End;
++ break;
++ }
++ }
++
+ ASSERT(index < MAX_PROGRAM_TEMPS);
+ if (intBegin[index] == -1) {
+ ASSERT(intEnd[index] == -1);
+ intBegin[index] = intEnd[index] = ic;
+ }
+ else {
+ intEnd[index] = ic;
+ }
+ }
+
+
+ /**
+ * Find first/last instruction that references each temporary register.
+ */
+ GLboolean
+ _mesa_find_temp_intervals(const struct prog_instruction *instructions,
+ GLuint numInstructions,
+ GLint intBegin[MAX_PROGRAM_TEMPS],
+ GLint intEnd[MAX_PROGRAM_TEMPS])
+ {
- update_interval(intBegin, intEnd, index, i);
- if (loopStackDepth > 0) {
- /* extend temp register's interval to end of loop */
- GLuint loopEnd = loopStack[loopStackDepth - 1].End;
- update_interval(intBegin, intEnd, index, loopEnd);
- }
+ struct loop_info loopStack[MAX_LOOP_NESTING];
+ GLuint loopStackDepth = 0;
+ GLuint i;
+
+ for (i = 0; i < MAX_PROGRAM_TEMPS; i++){
+ intBegin[i] = intEnd[i] = -1;
+ }
+
+ /* Scan instructions looking for temporary registers */
+ for (i = 0; i < numInstructions; i++) {
+ const struct prog_instruction *inst = instructions + i;
+ if (inst->Opcode == OPCODE_BGNLOOP) {
+ loopStack[loopStackDepth].Start = i;
+ loopStack[loopStackDepth].End = inst->BranchTarget;
+ loopStackDepth++;
+ }
+ else if (inst->Opcode == OPCODE_ENDLOOP) {
+ loopStackDepth--;
+ }
+ else if (inst->Opcode == OPCODE_CAL) {
+ return GL_FALSE;
+ }
+ else {
+ const GLuint numSrc = 3;/*_mesa_num_inst_src_regs(inst->Opcode);*/
+ GLuint j;
+ for (j = 0; j < numSrc; j++) {
+ if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) {
+ const GLuint index = inst->SrcReg[j].Index;
+ if (inst->SrcReg[j].RelAddr)
+ return GL_FALSE;
- update_interval(intBegin, intEnd, index, i);
- if (loopStackDepth > 0) {
- /* extend temp register's interval to end of loop */
- GLuint loopEnd = loopStack[loopStackDepth - 1].End;
- update_interval(intBegin, intEnd, index, loopEnd);
- }
++ update_interval(intBegin, intEnd, loopStack, loopStackDepth,
++ index, i);
+ }
+ }
+ if (inst->DstReg.File == PROGRAM_TEMPORARY) {
+ const GLuint index = inst->DstReg.Index;
+ if (inst->DstReg.RelAddr)
+ return GL_FALSE;
++ update_interval(intBegin, intEnd, loopStack, loopStackDepth,
++ index, i);
+ }
+ }
+ }
+
+ return GL_TRUE;
+ }
+
+
+ /**
+ * Find the live intervals for each temporary register in the program.
+ * For register R, the interval [A,B] indicates that R is referenced
+ * from instruction A through instruction B.
+ * Special consideration is needed for loops and subroutines.
+ * \return GL_TRUE if success, GL_FALSE if we cannot proceed for some reason
+ */
+ static GLboolean
+ find_live_intervals(struct gl_program *prog,
+ struct interval_list *liveIntervals)
+ {
+ GLint intBegin[MAX_PROGRAM_TEMPS], intEnd[MAX_PROGRAM_TEMPS];
+ GLuint i;
+
+ /*
+ * Note: we'll return GL_FALSE below if we find relative indexing
+ * into the TEMP register file. We can't handle that yet.
+ * We also give up on subroutines for now.
+ */
+
+ if (dbg) {
+ printf("Optimize: Begin find intervals\n");
+ }
+
+ /* build intermediate arrays */
+ if (!_mesa_find_temp_intervals(prog->Instructions, prog->NumInstructions,
+ intBegin, intEnd))
+ return GL_FALSE;
+
+ /* Build live intervals list from intermediate arrays */
+ liveIntervals->Num = 0;
+ for (i = 0; i < MAX_PROGRAM_TEMPS; i++) {
+ if (intBegin[i] >= 0) {
+ struct interval inv;
+ inv.Reg = i;
+ inv.Start = intBegin[i];
+ inv.End = intEnd[i];
+ append_interval(liveIntervals, &inv);
+ }
+ }
+
+ /* Sort the list according to interval starts */
+ sort_interval_list_by_start(liveIntervals);
+
+ if (dbg) {
+ /* print interval info */
+ for (i = 0; i < liveIntervals->Num; i++) {
+ const struct interval *inv = liveIntervals->Intervals + i;
+ printf("Reg[%d] live [%d, %d]:",
+ inv->Reg, inv->Start, inv->End);
+ if (1) {
+ GLuint j;
+ for (j = 0; j < inv->Start; j++)
+ printf(" ");
+ for (j = inv->Start; j <= inv->End; j++)
+ printf("x");
+ }
+ printf("\n");
+ }
+ }
+
+ return GL_TRUE;
+ }
+
+
+ /** Scan the array of used register flags to find free entry */
+ static GLint
+ alloc_register(GLboolean usedRegs[MAX_PROGRAM_TEMPS])
+ {
+ GLuint k;
+ for (k = 0; k < MAX_PROGRAM_TEMPS; k++) {
+ if (!usedRegs[k]) {
+ usedRegs[k] = GL_TRUE;
+ return k;
+ }
+ }
+ return -1;
+ }
+
+
+ /**
+ * This function implements "Linear Scan Register Allocation" to reduce
+ * the number of temporary registers used by the program.
+ *
+ * We compute the "live interval" for all temporary registers then
+ * examine the overlap of the intervals to allocate new registers.
+ * Basically, if two intervals do not overlap, they can use the same register.
+ */
+ static void
+ _mesa_reallocate_registers(struct gl_program *prog)
+ {
+ struct interval_list liveIntervals;
+ GLint registerMap[MAX_PROGRAM_TEMPS];
+ GLboolean usedRegs[MAX_PROGRAM_TEMPS];
+ GLuint i;
+ GLint maxTemp = -1;
+
+ if (dbg) {
+ printf("Optimize: Begin live-interval register reallocation\n");
+ _mesa_print_program(prog);
+ }
+
+ for (i = 0; i < MAX_PROGRAM_TEMPS; i++){
+ registerMap[i] = -1;
+ usedRegs[i] = GL_FALSE;
+ }
+
+ if (!find_live_intervals(prog, &liveIntervals)) {
+ if (dbg)
+ printf("Aborting register reallocation\n");
+ return;
+ }
+
+ {
+ struct interval_list activeIntervals;
+ activeIntervals.Num = 0;
+
+ /* loop over live intervals, allocating a new register for each */
+ for (i = 0; i < liveIntervals.Num; i++) {
+ const struct interval *live = liveIntervals.Intervals + i;
+
+ if (dbg)
+ printf("Consider register %u\n", live->Reg);
+
+ /* Expire old intervals. Intervals which have ended with respect
+ * to the live interval can have their remapped registers freed.
+ */
+ {
+ GLint j;
+ for (j = 0; j < (GLint) activeIntervals.Num; j++) {
+ const struct interval *inv = activeIntervals.Intervals + j;
+ if (inv->End >= live->Start) {
+ /* Stop now. Since the activeInterval list is sorted
+ * we know we don't have to go further.
+ */
+ break;
+ }
+ else {
+ /* Interval 'inv' has expired */
+ const GLint regNew = registerMap[inv->Reg];
+ ASSERT(regNew >= 0);
+
+ if (dbg)
+ printf(" expire interval for reg %u\n", inv->Reg);
+
+ /* remove interval j from active list */
+ remove_interval(&activeIntervals, inv);
+ j--; /* counter-act j++ in for-loop above */
+
+ /* return register regNew to the free pool */
+ if (dbg)
+ printf(" free reg %d\n", regNew);
+ ASSERT(usedRegs[regNew] == GL_TRUE);
+ usedRegs[regNew] = GL_FALSE;
+ }
+ }
+ }
+
+ /* find a free register for this live interval */
+ {
+ const GLint k = alloc_register(usedRegs);
+ if (k < 0) {
+ /* out of registers, give up */
+ return;
+ }
+ registerMap[live->Reg] = k;
+ maxTemp = MAX2(maxTemp, k);
+ if (dbg)
+ printf(" remap register %u -> %d\n", live->Reg, k);
+ }
+
+ /* Insert this live interval into the active list which is sorted
+ * by increasing end points.
+ */
+ insert_interval_by_end(&activeIntervals, live);
+ }
+ }
+
+ if (maxTemp + 1 < (GLint) liveIntervals.Num) {
+ /* OK, we've reduced the number of registers needed.
+ * Scan the program and replace all the old temporary register
+ * indexes with the new indexes.
+ */
+ replace_regs(prog, PROGRAM_TEMPORARY, registerMap);
+
+ prog->NumTemporaries = maxTemp + 1;
+ }
+
+ if (dbg) {
+ printf("Optimize: End live-interval register reallocation\n");
+ printf("Num temp regs before: %u after: %u\n",
+ liveIntervals.Num, maxTemp + 1);
+ _mesa_print_program(prog);
+ }
+ }
+
+
+ /**
+ * Apply optimizations to the given program to eliminate unnecessary
+ * instructions, temp regs, etc.
+ */
+ void
+ _mesa_optimize_program(GLcontext *ctx, struct gl_program *program)
+ {
+ _mesa_remove_extra_move_use(program);
+
+ if (1)
+ _mesa_remove_dead_code(program);
+
+ if (0) /* not tested much yet */
+ _mesa_remove_extra_moves(program);
+
+ if (0)
+ _mesa_consolidate_registers(program);
+ else
+ _mesa_reallocate_registers(program);
+ }
--- /dev/null
- for (j = 0; j < 4; j++) {
+ /*
+ * Mesa 3-D graphics library
+ * Version: 7.3
+ *
+ * Copyright (C) 1999-2008 Brian Paul All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+ /**
+ * \file prog_parameter.c
+ * Program parameter lists and functions.
+ * \author Brian Paul
+ */
+
+
+ #include "main/glheader.h"
+ #include "main/imports.h"
+ #include "main/macros.h"
+ #include "prog_instruction.h"
+ #include "prog_parameter.h"
+ #include "prog_statevars.h"
+
+
+ struct gl_program_parameter_list *
+ _mesa_new_parameter_list(void)
+ {
+ return CALLOC_STRUCT(gl_program_parameter_list);
+ }
+
+
+ struct gl_program_parameter_list *
+ _mesa_new_parameter_list_sized(unsigned size)
+ {
+ struct gl_program_parameter_list *p = _mesa_new_parameter_list();
+
+ if ((p != NULL) && (size != 0)) {
+ p->Size = size;
+
+ /* alloc arrays */
+ p->Parameters = (struct gl_program_parameter *)
+ calloc(1, size * sizeof(struct gl_program_parameter));
+
+ p->ParameterValues = (GLfloat (*)[4])
+ _mesa_align_malloc(size * 4 *sizeof(GLfloat), 16);
+
+
+ if ((p->Parameters == NULL) || (p->ParameterValues == NULL)) {
+ free(p->Parameters);
+ _mesa_align_free(p->ParameterValues);
+ free(p);
+ p = NULL;
+ }
+ }
+
+ return p;
+ }
+
+
+ /**
+ * Free a parameter list and all its parameters
+ */
+ void
+ _mesa_free_parameter_list(struct gl_program_parameter_list *paramList)
+ {
+ GLuint i;
+ for (i = 0; i < paramList->NumParameters; i++) {
+ if (paramList->Parameters[i].Name)
+ free((void *) paramList->Parameters[i].Name);
+ }
+ free(paramList->Parameters);
+ if (paramList->ParameterValues)
+ _mesa_align_free(paramList->ParameterValues);
+ free(paramList);
+ }
+
+
+ /**
+ * Add a new parameter to a parameter list.
+ * Note that parameter values are usually 4-element GLfloat vectors.
+ * When size > 4 we'll allocate a sequential block of parameters to
+ * store all the values (in blocks of 4).
+ *
+ * \param paramList the list to add the parameter to
+ * \param type type of parameter, such as
+ * \param name the parameter name, will be duplicated/copied!
+ * \param size number of elements in 'values' vector (1..4, or more)
+ * \param datatype GL_FLOAT, GL_FLOAT_VECx, GL_INT, GL_INT_VECx or GL_NONE.
+ * \param values initial parameter value, up to 4 GLfloats, or NULL
+ * \param state state indexes, or NULL
+ * \return index of new parameter in the list, or -1 if error (out of mem)
+ */
+ GLint
+ _mesa_add_parameter(struct gl_program_parameter_list *paramList,
+ gl_register_file type, const char *name,
+ GLuint size, GLenum datatype, const GLfloat *values,
+ const gl_state_index state[STATE_LENGTH],
+ GLbitfield flags)
+ {
+ const GLuint oldNum = paramList->NumParameters;
+ const GLuint sz4 = (size + 3) / 4; /* no. of new param slots needed */
+
+ assert(size > 0);
+
+ if (oldNum + sz4 > paramList->Size) {
+ /* Need to grow the parameter list array (alloc some extra) */
+ paramList->Size = paramList->Size + 4 * sz4;
+
+ /* realloc arrays */
+ paramList->Parameters = (struct gl_program_parameter *)
+ _mesa_realloc(paramList->Parameters,
+ oldNum * sizeof(struct gl_program_parameter),
+ paramList->Size * sizeof(struct gl_program_parameter));
+
+ paramList->ParameterValues = (GLfloat (*)[4])
+ _mesa_align_realloc(paramList->ParameterValues, /* old buf */
+ oldNum * 4 * sizeof(GLfloat), /* old size */
+ paramList->Size * 4 *sizeof(GLfloat), /* new sz */
+ 16);
+ }
+
+ if (!paramList->Parameters ||
+ !paramList->ParameterValues) {
+ /* out of memory */
+ paramList->NumParameters = 0;
+ paramList->Size = 0;
+ return -1;
+ }
+ else {
+ GLuint i;
+
+ paramList->NumParameters = oldNum + sz4;
+
+ memset(¶mList->Parameters[oldNum], 0,
+ sz4 * sizeof(struct gl_program_parameter));
+
+ for (i = 0; i < sz4; i++) {
+ struct gl_program_parameter *p = paramList->Parameters + oldNum + i;
+ p->Name = name ? _mesa_strdup(name) : NULL;
+ p->Type = type;
+ p->Size = size;
+ p->DataType = datatype;
+ p->Flags = flags;
+ if (values) {
+ COPY_4V(paramList->ParameterValues[oldNum + i], values);
+ values += 4;
+ p->Initialized = GL_TRUE;
+ }
+ else {
+ /* silence valgrind */
+ ASSIGN_4V(paramList->ParameterValues[oldNum + i], 0, 0, 0, 0);
+ }
+ size -= 4;
+ }
+
+ if (state) {
+ for (i = 0; i < STATE_LENGTH; i++)
+ paramList->Parameters[oldNum].StateIndexes[i] = state[i];
+ }
+
+ return (GLint) oldNum;
+ }
+ }
+
+
+ /**
+ * Add a new named program parameter (Ex: NV_fragment_program DEFINE statement)
+ * \return index of the new entry in the parameter list
+ */
+ GLint
+ _mesa_add_named_parameter(struct gl_program_parameter_list *paramList,
+ const char *name, const GLfloat values[4])
+ {
+ return _mesa_add_parameter(paramList, PROGRAM_NAMED_PARAM, name,
+ 4, GL_NONE, values, NULL, 0x0);
+
+ }
+
+
+ /**
+ * Add a new named constant to the parameter list.
+ * This will be used when the program contains something like this:
+ * PARAM myVals = { 0, 1, 2, 3 };
+ *
+ * \param paramList the parameter list
+ * \param name the name for the constant
+ * \param values four float values
+ * \return index/position of the new parameter in the parameter list
+ */
+ GLint
+ _mesa_add_named_constant(struct gl_program_parameter_list *paramList,
+ const char *name, const GLfloat values[4],
+ GLuint size)
+ {
+ /* first check if this is a duplicate constant */
+ GLint pos;
+ for (pos = 0; pos < (GLint)paramList->NumParameters; pos++) {
+ const GLfloat *pvals = paramList->ParameterValues[pos];
+ if (pvals[0] == values[0] &&
+ pvals[1] == values[1] &&
+ pvals[2] == values[2] &&
+ pvals[3] == values[3] &&
+ strcmp(paramList->Parameters[pos].Name, name) == 0) {
+ /* Same name and value is already in the param list - reuse it */
+ return pos;
+ }
+ }
+ /* not found, add new parameter */
+ return _mesa_add_parameter(paramList, PROGRAM_CONSTANT, name,
+ size, GL_NONE, values, NULL, 0x0);
+ }
+
+
+ /**
+ * Add a new unnamed constant to the parameter list. This will be used
+ * when a fragment/vertex program contains something like this:
+ * MOV r, { 0, 1, 2, 3 };
+ * If swizzleOut is non-null we'll search the parameter list for an
+ * existing instance of the constant which matches with a swizzle.
+ *
+ * \param paramList the parameter list
+ * \param values four float values
+ * \param swizzleOut returns swizzle mask for accessing the constant
+ * \return index/position of the new parameter in the parameter list.
+ */
+ GLint
+ _mesa_add_unnamed_constant(struct gl_program_parameter_list *paramList,
+ const GLfloat values[4], GLuint size,
+ GLuint *swizzleOut)
+ {
+ GLint pos;
+ ASSERT(size >= 1);
+ ASSERT(size <= 4);
+
+ if (swizzleOut &&
+ _mesa_lookup_parameter_constant(paramList, values,
+ size, &pos, swizzleOut)) {
+ return pos;
+ }
+
+ /* Look for empty space in an already unnamed constant parameter
+ * to add this constant. This will only work for single-element
+ * constants because we rely on smearing (i.e. .yyyy or .zzzz).
+ */
+ if (size == 1 && swizzleOut) {
+ for (pos = 0; pos < (GLint) paramList->NumParameters; pos++) {
+ struct gl_program_parameter *p = paramList->Parameters + pos;
+ if (p->Type == PROGRAM_CONSTANT && p->Size + size <= 4) {
+ /* ok, found room */
+ GLfloat *pVal = paramList->ParameterValues[pos];
+ GLuint swz = p->Size; /* 1, 2 or 3 for Y, Z, W */
+ pVal[p->Size] = values[0];
+ p->Size++;
+ *swizzleOut = MAKE_SWIZZLE4(swz, swz, swz, swz);
+ return pos;
+ }
+ }
+ }
+
+ /* add a new parameter to store this constant */
+ pos = _mesa_add_parameter(paramList, PROGRAM_CONSTANT, NULL,
+ size, GL_NONE, values, NULL, 0x0);
+ if (pos >= 0 && swizzleOut) {
+ if (size == 1)
+ *swizzleOut = SWIZZLE_XXXX;
+ else
+ *swizzleOut = SWIZZLE_NOOP;
+ }
+ return pos;
+ }
+
+
+ /**
+ * Add a uniform to the parameter list.
+ * Note that if the uniform is an array, size may be greater than
+ * what's implied by the datatype.
+ * \param name uniform's name
+ * \param size number of floats to allocate
+ * \param datatype GL_FLOAT_VEC3, GL_FLOAT_MAT4, etc.
+ */
+ GLint
+ _mesa_add_uniform(struct gl_program_parameter_list *paramList,
+ const char *name, GLuint size, GLenum datatype,
+ const GLfloat *values)
+ {
+ GLint i = _mesa_lookup_parameter_index(paramList, -1, name);
+ ASSERT(datatype != GL_NONE);
+ if (i >= 0 && paramList->Parameters[i].Type == PROGRAM_UNIFORM) {
+ ASSERT(paramList->Parameters[i].Size == size);
+ ASSERT(paramList->Parameters[i].DataType == datatype);
+ /* already in list */
+ return i;
+ }
+ else {
+ i = _mesa_add_parameter(paramList, PROGRAM_UNIFORM, name,
+ size, datatype, values, NULL, 0x0);
+ return i;
+ }
+ }
+
+
+ /**
+ * Mark the named uniform as 'used'.
+ */
+ void
+ _mesa_use_uniform(struct gl_program_parameter_list *paramList,
+ const char *name)
+ {
+ GLuint i;
+ for (i = 0; i < paramList->NumParameters; i++) {
+ struct gl_program_parameter *p = paramList->Parameters + i;
+ if ((p->Type == PROGRAM_UNIFORM || p->Type == PROGRAM_SAMPLER) &&
+ strcmp(p->Name, name) == 0) {
+ p->Used = GL_TRUE;
+ /* Note that large uniforms may occupy several slots so we're
+ * not done searching yet.
+ */
+ }
+ }
+ }
+
+
+ /**
+ * Add a sampler to the parameter list.
+ * \param name uniform's name
+ * \param datatype GL_SAMPLER_2D, GL_SAMPLER_2D_RECT_ARB, etc.
+ * \param index the sampler number (as seen in TEX instructions)
+ * \return sampler index (starting at zero) or -1 if error
+ */
+ GLint
+ _mesa_add_sampler(struct gl_program_parameter_list *paramList,
+ const char *name, GLenum datatype)
+ {
+ GLint i = _mesa_lookup_parameter_index(paramList, -1, name);
+ if (i >= 0 && paramList->Parameters[i].Type == PROGRAM_SAMPLER) {
+ ASSERT(paramList->Parameters[i].Size == 1);
+ ASSERT(paramList->Parameters[i].DataType == datatype);
+ /* already in list */
+ return (GLint) paramList->ParameterValues[i][0];
+ }
+ else {
+ GLuint i;
+ const GLint size = 1; /* a sampler is basically a texture unit number */
+ GLfloat value[4];
+ GLint numSamplers = 0;
+ for (i = 0; i < paramList->NumParameters; i++) {
+ if (paramList->Parameters[i].Type == PROGRAM_SAMPLER)
+ numSamplers++;
+ }
+ value[0] = (GLfloat) numSamplers;
+ value[1] = value[2] = value[3] = 0.0F;
+ (void) _mesa_add_parameter(paramList, PROGRAM_SAMPLER, name,
+ size, datatype, value, NULL, 0x0);
+ return numSamplers;
+ }
+ }
+
+
+ /**
+ * Add parameter representing a varying variable.
+ */
+ GLint
+ _mesa_add_varying(struct gl_program_parameter_list *paramList,
+ const char *name, GLuint size, GLenum datatype,
+ GLbitfield flags)
+ {
+ GLint i = _mesa_lookup_parameter_index(paramList, -1, name);
+ if (i >= 0 && paramList->Parameters[i].Type == PROGRAM_VARYING) {
+ /* already in list */
+ return i;
+ }
+ else {
+ /*assert(size == 4);*/
+ i = _mesa_add_parameter(paramList, PROGRAM_VARYING, name,
+ size, datatype, NULL, NULL, flags);
+ return i;
+ }
+ }
+
+
+ /**
+ * Add parameter representing a vertex program attribute.
+ * \param size size of attribute (in floats), may be -1 if unknown
+ * \param attrib the attribute index, or -1 if unknown
+ */
+ GLint
+ _mesa_add_attribute(struct gl_program_parameter_list *paramList,
+ const char *name, GLint size, GLenum datatype, GLint attrib)
+ {
+ GLint i = _mesa_lookup_parameter_index(paramList, -1, name);
+ if (i >= 0) {
+ /* replace */
+ if (attrib < 0)
+ attrib = i;
+ paramList->Parameters[i].StateIndexes[0] = attrib;
+ }
+ else {
+ /* add */
+ gl_state_index state[STATE_LENGTH];
+ state[0] = (gl_state_index) attrib;
+ if (size < 0)
+ size = 4;
+ i = _mesa_add_parameter(paramList, PROGRAM_INPUT, name,
+ size, datatype, NULL, state, 0x0);
+ }
+ return i;
+ }
+
+
+
+ #if 0 /* not used yet */
+ /**
+ * Returns the number of 4-component registers needed to store a piece
+ * of GL state. For matrices this may be as many as 4 registers,
+ * everything else needs
+ * just 1 register.
+ */
+ static GLuint
+ sizeof_state_reference(const GLint *stateTokens)
+ {
+ if (stateTokens[0] == STATE_MATRIX) {
+ GLuint rows = stateTokens[4] - stateTokens[3] + 1;
+ assert(rows >= 1);
+ assert(rows <= 4);
+ return rows;
+ }
+ else {
+ return 1;
+ }
+ }
+ #endif
+
+
+ /**
+ * Add a new state reference to the parameter list.
+ * This will be used when the program contains something like this:
+ * PARAM ambient = state.material.front.ambient;
+ *
+ * \param paramList the parameter list
+ * \param stateTokens an array of 5 (STATE_LENGTH) state tokens
+ * \return index of the new parameter.
+ */
+ GLint
+ _mesa_add_state_reference(struct gl_program_parameter_list *paramList,
+ const gl_state_index stateTokens[STATE_LENGTH])
+ {
+ const GLuint size = 4; /* XXX fix */
+ char *name;
+ GLint index;
+
+ /* Check if the state reference is already in the list */
+ for (index = 0; index < (GLint) paramList->NumParameters; index++) {
+ GLuint i, match = 0;
+ for (i = 0; i < STATE_LENGTH; i++) {
+ if (paramList->Parameters[index].StateIndexes[i] == stateTokens[i]) {
+ match++;
+ }
+ else {
+ break;
+ }
+ }
+ if (match == STATE_LENGTH) {
+ /* this state reference is already in the parameter list */
+ return index;
+ }
+ }
+
+ name = _mesa_program_state_string(stateTokens);
+ index = _mesa_add_parameter(paramList, PROGRAM_STATE_VAR, name,
+ size, GL_NONE,
+ NULL, (gl_state_index *) stateTokens, 0x0);
+ paramList->StateFlags |= _mesa_program_state_flags(stateTokens);
+
+ /* free name string here since we duplicated it in add_parameter() */
+ free(name);
+
+ return index;
+ }
+
+
+ /**
+ * Lookup a parameter value by name in the given parameter list.
+ * \return pointer to the float[4] values.
+ */
+ GLfloat *
+ _mesa_lookup_parameter_value(const struct gl_program_parameter_list *paramList,
+ GLsizei nameLen, const char *name)
+ {
+ GLint i = _mesa_lookup_parameter_index(paramList, nameLen, name);
+ if (i < 0)
+ return NULL;
+ else
+ return paramList->ParameterValues[i];
+ }
+
+
+ /**
+ * Given a program parameter name, find its position in the list of parameters.
+ * \param paramList the parameter list to search
+ * \param nameLen length of name (in chars).
+ * If length is negative, assume that name is null-terminated.
+ * \param name the name to search for
+ * \return index of parameter in the list.
+ */
+ GLint
+ _mesa_lookup_parameter_index(const struct gl_program_parameter_list *paramList,
+ GLsizei nameLen, const char *name)
+ {
+ GLint i;
+
+ if (!paramList)
+ return -1;
+
+ if (nameLen == -1) {
+ /* name is null-terminated */
+ for (i = 0; i < (GLint) paramList->NumParameters; i++) {
+ if (paramList->Parameters[i].Name &&
+ strcmp(paramList->Parameters[i].Name, name) == 0)
+ return i;
+ }
+ }
+ else {
+ /* name is not null-terminated, use nameLen */
+ for (i = 0; i < (GLint) paramList->NumParameters; i++) {
+ if (paramList->Parameters[i].Name &&
+ strncmp(paramList->Parameters[i].Name, name, nameLen) == 0
+ && strlen(paramList->Parameters[i].Name) == (size_t)nameLen)
+ return i;
+ }
+ }
+ return -1;
+ }
+
+
+ /**
+ * Look for a float vector in the given parameter list. The float vector
+ * may be of length 1, 2, 3 or 4. If swizzleOut is non-null, we'll try
+ * swizzling to find a match.
+ * \param list the parameter list to search
+ * \param v the float vector to search for
+ * \param vSize number of element in v
+ * \param posOut returns the position of the constant, if found
+ * \param swizzleOut returns a swizzle mask describing location of the
+ * vector elements if found.
+ * \return GL_TRUE if found, GL_FALSE if not found
+ */
+ GLboolean
+ _mesa_lookup_parameter_constant(const struct gl_program_parameter_list *list,
+ const GLfloat v[], GLuint vSize,
+ GLint *posOut, GLuint *swizzleOut)
+ {
+ GLuint i;
+
+ assert(vSize >= 1);
+ assert(vSize <= 4);
+
+ if (!list)
+ return -1;
+
+ for (i = 0; i < list->NumParameters; i++) {
+ if (list->Parameters[i].Type == PROGRAM_CONSTANT) {
+ if (!swizzleOut) {
+ /* swizzle not allowed */
+ GLuint j, match = 0;
+ for (j = 0; j < vSize; j++) {
+ if (v[j] == list->ParameterValues[i][j])
+ match++;
+ }
+ if (match == vSize) {
+ *posOut = i;
+ return GL_TRUE;
+ }
+ }
+ else {
+ /* try matching w/ swizzle */
+ if (vSize == 1) {
+ /* look for v[0] anywhere within float[4] value */
+ GLuint j;
++ for (j = 0; j < list->Parameters[i].Size; j++) {
+ if (list->ParameterValues[i][j] == v[0]) {
+ /* found it */
+ *posOut = i;
+ *swizzleOut = MAKE_SWIZZLE4(j, j, j, j);
+ return GL_TRUE;
+ }
+ }
+ }
+ else if (vSize <= list->Parameters[i].Size) {
+ /* see if we can match this constant (with a swizzle) */
+ GLuint swz[4];
+ GLuint match = 0, j, k;
+ for (j = 0; j < vSize; j++) {
+ if (v[j] == list->ParameterValues[i][j]) {
+ swz[j] = j;
+ match++;
+ }
+ else {
+ for (k = 0; k < list->Parameters[i].Size; k++) {
+ if (v[j] == list->ParameterValues[i][k]) {
+ swz[j] = k;
+ match++;
+ break;
+ }
+ }
+ }
+ }
+ /* smear last value to remaining positions */
+ for (; j < 4; j++)
+ swz[j] = swz[j-1];
+
+ if (match == vSize) {
+ *posOut = i;
+ *swizzleOut = MAKE_SWIZZLE4(swz[0], swz[1], swz[2], swz[3]);
+ return GL_TRUE;
+ }
+ }
+ }
+ }
+ }
+
+ *posOut = -1;
+ return GL_FALSE;
+ }
+
+
+ struct gl_program_parameter_list *
+ _mesa_clone_parameter_list(const struct gl_program_parameter_list *list)
+ {
+ struct gl_program_parameter_list *clone;
+ GLuint i;
+
+ clone = _mesa_new_parameter_list();
+ if (!clone)
+ return NULL;
+
+ /** Not too efficient, but correct */
+ for (i = 0; i < list->NumParameters; i++) {
+ struct gl_program_parameter *p = list->Parameters + i;
+ struct gl_program_parameter *pCopy;
+ GLuint size = MIN2(p->Size, 4);
+ GLint j = _mesa_add_parameter(clone, p->Type, p->Name, size, p->DataType,
+ list->ParameterValues[i], NULL, 0x0);
+ ASSERT(j >= 0);
+ pCopy = clone->Parameters + j;
+ pCopy->Used = p->Used;
+ pCopy->Flags = p->Flags;
+ /* copy state indexes */
+ if (p->Type == PROGRAM_STATE_VAR) {
+ GLint k;
+ for (k = 0; k < STATE_LENGTH; k++) {
+ pCopy->StateIndexes[k] = p->StateIndexes[k];
+ }
+ }
+ else {
+ clone->Parameters[j].Size = p->Size;
+ }
+
+ }
+
+ clone->StateFlags = list->StateFlags;
+
+ return clone;
+ }
+
+
+ /**
+ * Return a new parameter list which is listA + listB.
+ */
+ struct gl_program_parameter_list *
+ _mesa_combine_parameter_lists(const struct gl_program_parameter_list *listA,
+ const struct gl_program_parameter_list *listB)
+ {
+ struct gl_program_parameter_list *list;
+
+ if (listA) {
+ list = _mesa_clone_parameter_list(listA);
+ if (list && listB) {
+ GLuint i;
+ for (i = 0; i < listB->NumParameters; i++) {
+ struct gl_program_parameter *param = listB->Parameters + i;
+ _mesa_add_parameter(list, param->Type, param->Name, param->Size,
+ param->DataType,
+ listB->ParameterValues[i],
+ param->StateIndexes,
+ param->Flags);
+ }
+ }
+ }
+ else if (listB) {
+ list = _mesa_clone_parameter_list(listB);
+ }
+ else {
+ list = NULL;
+ }
+ return list;
+ }
+
+
+
+ /**
+ * Find longest name of all uniform parameters in list.
+ */
+ GLuint
+ _mesa_longest_parameter_name(const struct gl_program_parameter_list *list,
+ gl_register_file type)
+ {
+ GLuint i, maxLen = 0;
+ if (!list)
+ return 0;
+ for (i = 0; i < list->NumParameters; i++) {
+ if (list->Parameters[i].Type == type) {
+ GLuint len = strlen(list->Parameters[i].Name);
+ if (len > maxLen)
+ maxLen = len;
+ }
+ }
+ return maxLen;
+ }
+
+
+ /**
+ * Count the number of parameters in the last that match the given type.
+ */
+ GLuint
+ _mesa_num_parameters_of_type(const struct gl_program_parameter_list *list,
+ gl_register_file type)
+ {
+ GLuint i, count = 0;
+ if (list) {
+ for (i = 0; i < list->NumParameters; i++) {
+ if (list->Parameters[i].Type == type)
+ count++;
+ }
+ }
+ return count;
+ }
--- /dev/null
-
+ /*
+ * Copyright © 2008 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+ #include "main/imports.h"
+ #include "symbol_table.h"
+ #include "hash_table.h"
+
+ struct symbol {
+ /**
+ * Link to the next symbol in the table with the same name
+ *
+ * The linked list of symbols with the same name is ordered by scope
+ * from inner-most to outer-most.
+ */
+ struct symbol *next_with_same_name;
+
+
+ /**
+ * Link to the next symbol in the table with the same scope
+ *
+ * The linked list of symbols with the same scope is unordered. Symbols
+ * in this list my have unique names.
+ */
+ struct symbol *next_with_same_scope;
+
+
+ /**
+ * Header information for the list of symbols with the same name.
+ */
+ struct symbol_header *hdr;
+
+
+ /**
+ * Name space of the symbol
+ *
+ * Name space are arbitrary user assigned integers. No two symbols can
+ * exist in the same name space at the same scope level.
+ */
+ int name_space;
+
++ /** Scope depth where this symbol was defined. */
++ unsigned depth;
++
+ /**
+ * Arbitrary user supplied data.
+ */
+ void *data;
+ };
+
+
+ /**
+ */
+ struct symbol_header {
+ /** Linkage in list of all headers in a given symbol table. */
+ struct symbol_header *next;
+
+ /** Symbol name. */
+ const char *name;
+
+ /** Linked list of symbols with the same name. */
+ struct symbol *symbols;
+ };
+
+
+ /**
+ * Element of the scope stack.
+ */
+ struct scope_level {
+ /** Link to next (inner) scope level. */
+ struct scope_level *next;
+
+ /** Linked list of symbols with the same scope. */
+ struct symbol *symbols;
+ };
+
+
+ /**
+ *
+ */
+ struct _mesa_symbol_table {
+ /** Hash table containing all symbols in the symbol table. */
+ struct hash_table *ht;
+
+ /** Top of scope stack. */
+ struct scope_level *current_scope;
+
+ /** List of all symbol headers in the table. */
+ struct symbol_header *hdr;
++
++ /** Current scope depth. */
++ unsigned depth;
+ };
+
+
+ struct _mesa_symbol_table_iterator {
+ /**
+ * Name space of symbols returned by this iterator.
+ */
+ int name_space;
+
+
+ /**
+ * Currently iterated symbol
+ *
+ * The next call to \c _mesa_symbol_table_iterator_get will return this
+ * value. It will also update this value to the value that should be
+ * returned by the next call.
+ */
+ struct symbol *curr;
+ };
+
+
+ static void
+ check_symbol_table(struct _mesa_symbol_table *table)
+ {
+ #if 1
+ struct scope_level *scope;
+
+ for (scope = table->current_scope; scope != NULL; scope = scope->next) {
+ struct symbol *sym;
+
+ for (sym = scope->symbols
+ ; sym != NULL
+ ; sym = sym->next_with_same_name) {
+ const struct symbol_header *const hdr = sym->hdr;
+ struct symbol *sym2;
+
+ for (sym2 = hdr->symbols
+ ; sym2 != NULL
+ ; sym2 = sym2->next_with_same_name) {
+ assert(sym2->hdr == hdr);
+ }
+ }
+ }
+ #endif
+ }
+
+ void
+ _mesa_symbol_table_pop_scope(struct _mesa_symbol_table *table)
+ {
+ struct scope_level *const scope = table->current_scope;
+ struct symbol *sym = scope->symbols;
+
+ table->current_scope = scope->next;
++ table->depth--;
+
+ free(scope);
+
+ while (sym != NULL) {
+ struct symbol *const next = sym->next_with_same_scope;
+ struct symbol_header *const hdr = sym->hdr;
+
+ assert(hdr->symbols == sym);
+
+ hdr->symbols = sym->next_with_same_name;
+
+ free(sym);
+
+ sym = next;
+ }
+
+ check_symbol_table(table);
+ }
+
+
+ void
+ _mesa_symbol_table_push_scope(struct _mesa_symbol_table *table)
+ {
+ struct scope_level *const scope = calloc(1, sizeof(*scope));
+
+ scope->next = table->current_scope;
+ table->current_scope = scope;
++ table->depth++;
+ }
+
+
+ static struct symbol_header *
+ find_symbol(struct _mesa_symbol_table *table, const char *name)
+ {
+ return (struct symbol_header *) hash_table_find(table->ht, name);
+ }
+
+
+ struct _mesa_symbol_table_iterator *
+ _mesa_symbol_table_iterator_ctor(struct _mesa_symbol_table *table,
+ int name_space, const char *name)
+ {
+ struct _mesa_symbol_table_iterator *iter = calloc(1, sizeof(*iter));
+ struct symbol_header *const hdr = find_symbol(table, name);
+
+ iter->name_space = name_space;
+
+ if (hdr != NULL) {
+ struct symbol *sym;
+
+ for (sym = hdr->symbols; sym != NULL; sym = sym->next_with_same_name) {
+ assert(sym->hdr == hdr);
+
+ if ((name_space == -1) || (sym->name_space == name_space)) {
+ iter->curr = sym;
+ break;
+ }
+ }
+ }
+
+ return iter;
+ }
+
+
+ void
+ _mesa_symbol_table_iterator_dtor(struct _mesa_symbol_table_iterator *iter)
+ {
+ free(iter);
+ }
+
+
+ void *
+ _mesa_symbol_table_iterator_get(struct _mesa_symbol_table_iterator *iter)
+ {
+ return (iter->curr == NULL) ? NULL : iter->curr->data;
+ }
+
+
+ int
+ _mesa_symbol_table_iterator_next(struct _mesa_symbol_table_iterator *iter)
+ {
+ struct symbol_header *hdr;
+
+ if (iter->curr == NULL) {
+ return 0;
+ }
+
+ hdr = iter->curr->hdr;
+ iter->curr = iter->curr->next_with_same_name;
+
+ while (iter->curr != NULL) {
+ assert(iter->curr->hdr == hdr);
+
+ if ((iter->name_space == -1)
+ || (iter->curr->name_space == iter->name_space)) {
+ return 1;
+ }
+
+ iter->curr = iter->curr->next_with_same_name;
+ }
+
+ return 0;
+ }
+
+
++/**
++ * Determine the scope "distance" of a symbol from the current scope
++ *
++ * \return
++ * A non-negative number for the number of scopes between the current scope
++ * and the scope where a symbol was defined. A value of zero means the current
++ * scope. A negative number if the symbol does not exist.
++ */
++int
++_mesa_symbol_table_symbol_scope(struct _mesa_symbol_table *table,
++ int name_space, const char *name)
++{
++ struct symbol_header *const hdr = find_symbol(table, name);
++ struct symbol *sym;
++
++ if (hdr != NULL) {
++ for (sym = hdr->symbols; sym != NULL; sym = sym->next_with_same_name) {
++ assert(sym->hdr == hdr);
++
++ if ((name_space == -1) || (sym->name_space == name_space)) {
++ assert(sym->depth <= table->depth);
++ return sym->depth - table->depth;
++ }
++ }
++ }
++
++ return -1;
++}
++
++
+ void *
+ _mesa_symbol_table_find_symbol(struct _mesa_symbol_table *table,
+ int name_space, const char *name)
+ {
+ struct symbol_header *const hdr = find_symbol(table, name);
+
+ if (hdr != NULL) {
+ struct symbol *sym;
+
+
+ for (sym = hdr->symbols; sym != NULL; sym = sym->next_with_same_name) {
+ assert(sym->hdr == hdr);
+
+ if ((name_space == -1) || (sym->name_space == name_space)) {
+ return sym->data;
+ }
+ }
+ }
+
+ return NULL;
+ }
+
+
+ int
+ _mesa_symbol_table_add_symbol(struct _mesa_symbol_table *table,
+ int name_space, const char *name,
+ void *declaration)
+ {
+ struct symbol_header *hdr;
+ struct symbol *sym;
+
+ check_symbol_table(table);
+
+ hdr = find_symbol(table, name);
+
+ check_symbol_table(table);
+
+ if (hdr == NULL) {
+ hdr = calloc(1, sizeof(*hdr));
+ hdr->name = name;
+
+ hash_table_insert(table->ht, hdr, name);
+ hdr->next = table->hdr;
+ table->hdr = hdr;
+ }
+
+ check_symbol_table(table);
+
++ /* If the symbol already exists in this namespace at this scope, it cannot
++ * be added to the table.
++ */
++ for (sym = hdr->symbols
++ ; (sym != NULL) && (sym->name_space != name_space)
++ ; sym = sym->next_with_same_name) {
++ /* empty */
++ }
++
++ if (sym && (sym->depth == table->depth))
++ return -1;
++
+ sym = calloc(1, sizeof(*sym));
+ sym->next_with_same_name = hdr->symbols;
+ sym->next_with_same_scope = table->current_scope->symbols;
+ sym->hdr = hdr;
+ sym->name_space = name_space;
+ sym->data = declaration;
++ sym->depth = table->depth;
+
+ assert(sym->hdr == hdr);
+
+ hdr->symbols = sym;
+ table->current_scope->symbols = sym;
+
+ check_symbol_table(table);
+ return 0;
+ }
+
+
+ struct _mesa_symbol_table *
+ _mesa_symbol_table_ctor(void)
+ {
+ struct _mesa_symbol_table *table = calloc(1, sizeof(*table));
+
+ if (table != NULL) {
+ table->ht = hash_table_ctor(32, hash_table_string_hash,
+ hash_table_string_compare);
+
+ _mesa_symbol_table_push_scope(table);
+ }
+
+ return table;
+ }
+
+
+ void
+ _mesa_symbol_table_dtor(struct _mesa_symbol_table *table)
+ {
+ struct symbol_header *hdr;
+ struct symbol_header *next;
+
+ while (table->current_scope != NULL) {
+ _mesa_symbol_table_pop_scope(table);
+ }
+
+ for (hdr = table->hdr; hdr != NULL; hdr = next) {
+ next = hdr->next;
+ free(hdr);
+ }
+
+ hash_table_dtor(table->ht);
+ free(table);
+ }
--- /dev/null
+ /*
+ * Copyright © 2008 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+ #ifndef MESA_SYMBOL_TABLE_H
+ #define MESA_SYMBOL_TABLE_H
+
+ struct _mesa_symbol_table;
+ struct _mesa_symbol_table_iterator;
+
+ extern void _mesa_symbol_table_push_scope(struct _mesa_symbol_table *table);
+
+ extern void _mesa_symbol_table_pop_scope(struct _mesa_symbol_table *table);
+
+ extern int _mesa_symbol_table_add_symbol(struct _mesa_symbol_table *symtab,
+ int name_space, const char *name, void *declaration);
+
++extern int _mesa_symbol_table_symbol_scope(struct _mesa_symbol_table *table,
++ int name_space, const char *name);
++
+ extern void *_mesa_symbol_table_find_symbol(
+ struct _mesa_symbol_table *symtab, int name_space, const char *name);
+
+ extern struct _mesa_symbol_table *_mesa_symbol_table_ctor(void);
+
+ extern void _mesa_symbol_table_dtor(struct _mesa_symbol_table *);
+
+ extern struct _mesa_symbol_table_iterator *_mesa_symbol_table_iterator_ctor(
+ struct _mesa_symbol_table *table, int name_space, const char *name);
+
+ extern void _mesa_symbol_table_iterator_dtor(
+ struct _mesa_symbol_table_iterator *);
+
+ extern void *_mesa_symbol_table_iterator_get(
+ struct _mesa_symbol_table_iterator *iter);
+
+ extern int _mesa_symbol_table_iterator_next(
+ struct _mesa_symbol_table_iterator *iter);
+
+ #endif /* MESA_SYMBOL_TABLE_H */
state_tracker/st_program.c \
state_tracker/st_texture.c
- SHADER_SOURCES = \
- shader/arbprogparse.c \
- shader/arbprogram.c \
- shader/atifragshader.c \
- shader/hash_table.c \
- shader/lex.yy.c \
- shader/nvfragparse.c \
- shader/nvprogram.c \
- shader/nvvertparse.c \
- shader/program.c \
- shader/program_parse.tab.c \
- shader/program_parse_extra.c \
- shader/prog_cache.c \
- shader/prog_execute.c \
- shader/prog_instruction.c \
- shader/prog_noise.c \
- shader/prog_optimize.c \
- shader/prog_parameter.c \
- shader/prog_parameter_layout.c \
- shader/prog_print.c \
- shader/prog_statevars.c \
- shader/prog_uniform.c \
- shader/programopt.c \
- shader/symbol_table.c \
- shader/shader_api.c \
- shader/uniforms.c
+ PROGRAM_SOURCES = \
+ program/arbprogparse.c \
+ program/hash_table.c \
+ program/lex.yy.c \
+ program/nvfragparse.c \
+ program/nvvertparse.c \
+ program/program.c \
+ program/program_parse.tab.c \
+ program/program_parse_extra.c \
+ program/prog_cache.c \
+ program/prog_execute.c \
+ program/prog_instruction.c \
+ program/prog_noise.c \
+ program/prog_optimize.c \
+ program/prog_parameter.c \
+ program/prog_parameter_layout.c \
+ program/prog_print.c \
+ program/prog_statevars.c \
+ program/prog_uniform.c \
+ program/programopt.c \
+ program/symbol_table.c
- shader/ir_to_mesa.cpp
+SHADER_CXX_SOURCES = \
++ program/ir_to_mesa.cpp
+
SLANG_SOURCES = \
- shader/slang/slang_builtin.c \
- shader/slang/slang_codegen.c \
- shader/slang/slang_compile.c \
- shader/slang/slang_compile_function.c \
- shader/slang/slang_compile_operation.c \
- shader/slang/slang_compile_struct.c \
- shader/slang/slang_compile_variable.c \
- shader/slang/slang_emit.c \
- shader/slang/slang_ir.c \
- shader/slang/slang_label.c \
- shader/slang/slang_link.c \
- shader/slang/slang_log.c \
- shader/slang/slang_mem.c \
- shader/slang/slang_print.c \
- shader/slang/slang_simplify.c \
- shader/slang/slang_storage.c \
- shader/slang/slang_typeinfo.c \
- shader/slang/slang_vartable.c \
- shader/slang/slang_utility.c
+ slang/slang_builtin.c \
+ slang/slang_codegen.c \
+ slang/slang_compile.c \
+ slang/slang_compile_function.c \
+ slang/slang_compile_operation.c \
+ slang/slang_compile_struct.c \
+ slang/slang_compile_variable.c \
+ slang/slang_emit.c \
+ slang/slang_ir.c \
+ slang/slang_label.c \
+ slang/slang_link.c \
+ slang/slang_log.c \
+ slang/slang_mem.c \
+ slang/slang_print.c \
+ slang/slang_simplify.c \
+ slang/slang_storage.c \
+ slang/slang_typeinfo.c \
+ slang/slang_vartable.c \
+ slang/slang_utility.c
ASM_C_SOURCES = \
x86/common_x86.c \
$(MATH_SOURCES) \
$(VBO_SOURCES) \
$(STATETRACKER_SOURCES) \
- $(SHADER_SOURCES) \
+ $(PROGRAM_SOURCES) \
ppc/common_ppc.c \
- x86/common_x86.c \
- $(SLANG_SOURCES)
+ x86/common_x86.c
+
+MESA_GALLIUM_CXX_SOURCES = \
+ $(SHADER_CXX_SOURCES)
# All the core C sources, for dependency checking
ALL_SOURCES = \