Merge remote-tracking branch 'mesa-public/master' into vulkan

author Jason Ekstrand <jason.ekstrand@intel.com>

Fri, 2 Oct 2015 23:45:48 +0000 (16:45 -0700)

committer Jason Ekstrand <jason.ekstrand@intel.com>

Fri, 2 Oct 2015 23:52:47 +0000 (16:52 -0700)
author Jason Ekstrand <jason.ekstrand@intel.com>
Fri, 2 Oct 2015 23:45:48 +0000 (16:45 -0700)
committer Jason Ekstrand <jason.ekstrand@intel.com>
Fri, 2 Oct 2015 23:52:47 +0000 (16:52 -0700)
diff --git a/src/glsl/ast_array_index.cpp b/src/glsl/ast_array_index.cpp

index dfb31073f82a9927aa4005ad90ae1f7b1ad55c39..5e8f49d70b0daf0139c26a8d870142f019b36ab7 100644 (file)
--- a/src/glsl/ast_array_index.cpp
+++ b/src/glsl/ast_array_index.cpp
@@ -231,15 +231,17 @@ _mesa_ast_array_index_to_hir(void *mem_ctx,
              _mesa_glsl_error(&loc, state, "unsized array index must be constant");
           }
        } else if (array->type->fields.array->is_interface()
-                 && array->variable_referenced()->data.mode == ir_var_uniform
+                 && (array->variable_referenced()->data.mode == ir_var_uniform ||
+                     array->variable_referenced()->data.mode == ir_var_shader_storage)
                   && !state->is_version(400, 0) && !state->ARB_gpu_shader5_enable) {
-        /* Page 46 in section 4.3.7 of the OpenGL ES 3.00 spec says:
+        /* Page 50 in section 4.3.9 of the OpenGL ES 3.10 spec says:
           *
-         *     "All indexes used to index a uniform block array must be
-         *     constant integral expressions."
+         *     "All indices used to index a uniform or shader storage block
+         *     array must be constant integral expressions."
           */
-        _mesa_glsl_error(&loc, state,
-                         "uniform block array index must be constant");
+        _mesa_glsl_error(&loc, state, "%s block array index must be constant",
+                          array->variable_referenced()->data.mode
+                          == ir_var_uniform ? "uniform" : "shader storage");
        } else {
          /* whole_variable_referenced can return NULL if the array is a
           * member of a structure.  In this case it is safe to not update
diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp

index 351aafc1a72aee29aa274403725ed8b5453da5db..849a8ea29fd126395ca2f7a08bd90f360e05e9f9 100644 (file)
--- a/src/glsl/ast_to_hir.cpp
+++ b/src/glsl/ast_to_hir.cpp
@@ -5766,6 +5766,10 @@ ast_process_structure_or_interface_block(exec_list *instructions,
  
           const struct ast_type_qualifier *const qual =
              & decl_list->type->qualifier;
+
+         if (qual->flags.q.explicit_binding)
+            validate_binding_qualifier(state, &loc, decl_type, qual);
+
           if (qual->flags.q.std140 ||
               qual->flags.q.std430 ||
               qual->flags.q.packed ||
diff --git a/src/glsl/glsl_types.cpp b/src/glsl/glsl_types.cpp

index 15cd45ea708e4adf458536c638eec7293ffdf541..112e5ffdf7ceb1b29d561040bf6659a6f69a7d60 100644 (file)
--- a/src/glsl/glsl_types.cpp
+++ b/src/glsl/glsl_types.cpp
@@ -206,7 +206,7 @@ glsl_type::glsl_type(const char *subroutine_name) :
     base_type(GLSL_TYPE_SUBROUTINE),
     sampler_dimensionality(0), sampler_shadow(0), sampler_array(0),
     sampler_type(0), interface_packing(0),
-   vector_elements(0), matrix_columns(0),
+   vector_elements(1), matrix_columns(1),
     length(0)
  {
     mtx_lock(&glsl_type::mutex);
@@ -214,7 +214,6 @@ glsl_type::glsl_type(const char *subroutine_name) :
     init_ralloc_type_ctx();
     assert(subroutine_name != NULL);
     this->name = ralloc_strdup(this->mem_ctx, subroutine_name);
-   this->vector_elements = 1;
     mtx_unlock(&glsl_type::mutex);
  }
  
@@ -1615,6 +1614,7 @@ glsl_type::std430_base_alignment(bool row_major) const
           base_alignment = MAX2(base_alignment,
                                 field_type->std430_base_alignment(field_row_major));
        }
+      assert(base_alignment > 0);
        return base_alignment;
     }
     assert(!"not reached");
diff --git a/src/glsl/link_uniforms.cpp b/src/glsl/link_uniforms.cpp

index 47d49c84e65ca96bc667b326c102f41aea950e8b..740b0a46aeec7aaeb55e80fd1ee39e83ab5878c4 100644 (file)
--- a/src/glsl/link_uniforms.cpp
+++ b/src/glsl/link_uniforms.cpp
@@ -1131,15 +1131,15 @@ link_assign_uniform_locations(struct gl_shader_program *prog,
     const unsigned num_data_slots = uniform_size.num_values;
     const unsigned hidden_uniforms = uniform_size.num_hidden_uniforms;
  
+   /* assign hidden uniforms a slot id */
+   hiddenUniforms->iterate(assign_hidden_uniform_slot_id, &uniform_size);
+   delete hiddenUniforms;
+
     /* On the outside chance that there were no uniforms, bail out.
      */
     if (num_uniforms == 0)
        return;
  
-   /* assign hidden uniforms a slot id */
-   hiddenUniforms->iterate(assign_hidden_uniform_slot_id, &uniform_size);
-   delete hiddenUniforms;
-
     struct gl_uniform_storage *uniforms =
        rzalloc_array(prog, struct gl_uniform_storage, num_uniforms);
     union gl_constant_value *data =
diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp

index 87c7d4b087b5065f1a1575764e1a237d14cb1780..dbf300ac6915f32a86592d4257ad6104ba00ecd0 100644 (file)
--- a/src/glsl/linker.cpp
+++ b/src/glsl/linker.cpp
@@ -3133,6 +3133,60 @@ check_explicit_uniform_locations(struct gl_context *ctx,
     delete uniform_map;
  }
  
+static bool
+should_add_buffer_variable(struct gl_shader_program *shProg,
+                           GLenum type, const char *name)
+{
+   bool found_interface = false;
+   const char *block_name = NULL;
+
+   /* These rules only apply to buffer variables. So we return
+    * true for the rest of types.
+    */
+   if (type != GL_BUFFER_VARIABLE)
+      return true;
+
+   for (unsigned i = 0; i < shProg->NumBufferInterfaceBlocks; i++) {
+      block_name = shProg->UniformBlocks[i].Name;
+      if (strncmp(block_name, name, strlen(block_name)) == 0) {
+         found_interface = true;
+         break;
+      }
+   }
+
+   /* We remove the interface name from the buffer variable name,
+    * including the dot that follows it.
+    */
+   if (found_interface)
+      name = name + strlen(block_name) + 1;
+
+   /* From: ARB_program_interface_query extension:
+    *
+    *  "For an active shader storage block member declared as an array, an
+    *   entry will be generated only for the first array element, regardless
+    *   of its type.  For arrays of aggregate types, the enumeration rules are
+    *   applied recursively for the single enumerated array element.
+    */
+   const char *first_dot = strchr(name, '.');
+   const char *first_square_bracket = strchr(name, '[');
+
+   /* The buffer variable is on top level and it is not an array */
+   if (!first_square_bracket) {
+      return true;
+   /* The shader storage block member is a struct, then generate the entry */
+   } else if (first_dot && first_dot < first_square_bracket) {
+      return true;
+   } else {
+      /* Shader storage block member is an array, only generate an entry for the
+       * first array element.
+       */
+      if (strncmp(first_square_bracket, "[0]", 3) == 0)
+         return true;
+   }
+
+   return false;
+}
+
  static bool
  add_program_resource(struct gl_shader_program *prog, GLenum type,
                       const void *data, uint8_t stages)
@@ -3412,6 +3466,10 @@ build_program_resource_list(struct gl_shader_program *shProg)
  
        bool is_shader_storage =  shProg->UniformStorage[i].is_shader_storage;
        GLenum type = is_shader_storage ? GL_BUFFER_VARIABLE : GL_UNIFORM;
+      if (!should_add_buffer_variable(shProg, type,
+                                      shProg->UniformStorage[i].name))
+         continue;
+
        if (!add_program_resource(shProg, type,
                                  &shProg->UniformStorage[i], stageref))
           return;
diff --git a/src/glsl/lower_ubo_reference.cpp b/src/glsl/lower_ubo_reference.cpp

index e581306019b5619488da64101786dc8ff53b01cb..247620e6148737951770174eac5b1775bb499704 100644 (file)
--- a/src/glsl/lower_ubo_reference.cpp
+++ b/src/glsl/lower_ubo_reference.cpp
@@ -754,6 +754,12 @@ lower_ubo_reference_visitor::emit_access(bool is_write,
              add(base_offset,
                  new(mem_ctx) ir_constant(deref_offset + i * matrix_stride));
           if (is_write) {
+            /* If the component is not in the writemask, then don't
+             * store any value.
+             */
+            if (!((1 << i) & write_mask))
+               continue;
+
              base_ir->insert_after(ssbo_store(swizzle(deref, i, 1), chan_offset, 1));
           } else {
              if (!this->is_shader_storage) {
diff --git a/src/glsl/nir/glsl_to_nir.cpp b/src/glsl/nir/glsl_to_nir.cpp

index 5ee6ff1d854ec35a6eb212ded4117f4ca52795cd..6cd23340e3f32d3e85199ead689e25cff7136ed3 100644 (file)
--- a/src/glsl/nir/glsl_to_nir.cpp
+++ b/src/glsl/nir/glsl_to_nir.cpp
@@ -131,9 +131,13 @@ private:
  }; /* end of anonymous namespace */
  
  nir_shader *
-glsl_to_nir(struct gl_shader *sh, const nir_shader_compiler_options *options)
+glsl_to_nir(const struct gl_shader_program *shader_prog,
+            gl_shader_stage stage,
+            const nir_shader_compiler_options *options)
  {
-   nir_shader *shader = nir_shader_create(NULL, sh->Stage, options);
+   struct gl_shader *sh = shader_prog->_LinkedShaders[stage];
+
+   nir_shader *shader = nir_shader_create(NULL, stage, options);
  
     nir_visitor v1(shader, sh);
     nir_function_visitor v2(&v1);
@@ -142,8 +146,26 @@ glsl_to_nir(struct gl_shader *sh, const nir_shader_compiler_options *options)
  
     nir_lower_outputs_to_temporaries(shader);
  
-   shader->gs.vertices_out = sh->Geom.VerticesOut;
-   shader->gs.invocations = sh->Geom.Invocations;
+   /* TODO: Use _mesa_fls instead */
+   unsigned num_textures = 0;
+   for (unsigned i = 0; i < 8 * sizeof(sh->Program->SamplersUsed); i++)
+      if (sh->Program->SamplersUsed & (1 << i))
+         num_textures = i;
+
+   shader->info.name = ralloc_asprintf(shader, "GLSL%d", sh->Name);
+   shader->info.num_textures = num_textures;
+   shader->info.num_ubos = sh->NumUniformBlocks;
+   shader->info.num_abos = shader_prog->NumAtomicBuffers;
+   shader->info.num_ssbos = shader_prog->NumBufferInterfaceBlocks;
+   shader->info.num_images = sh->NumImages;
+   shader->info.inputs_read = sh->Program->InputsRead;
+   shader->info.outputs_written = sh->Program->OutputsWritten;
+   shader->info.system_values_read = sh->Program->SystemValuesRead;
+   shader->info.uses_texture_gather = sh->Program->UsesGather;
+   shader->info.uses_clip_distance_out = sh->Program->UsesClipDistanceOut;
+   shader->info.separate_shader = shader_prog->SeparateShader;
+   shader->info.gs.vertices_out = sh->Geom.VerticesOut;
+   shader->info.gs.invocations = sh->Geom.Invocations;
  
     return shader;
  }
@@ -274,6 +296,11 @@ nir_visitor::visit(ir_variable *ir)
           /* For whatever reason, GLSL IR makes gl_FrontFacing an input */
           var->data.location = SYSTEM_VALUE_FRONT_FACE;
           var->data.mode = nir_var_system_value;
+      } else if (shader->stage == MESA_SHADER_GEOMETRY &&
+                 ir->data.location == VARYING_SLOT_PRIMITIVE_ID) {
+         /* For whatever reason, GLSL IR makes gl_PrimitiveIDIn an input */
+         var->data.location = SYSTEM_VALUE_PRIMITIVE_ID;
+         var->data.mode = nir_var_system_value;
        } else {
           var->data.mode = nir_var_shader_in;
        }
@@ -799,7 +826,6 @@ nir_visitor::visit(ir_call *ir)
              instr = nir_intrinsic_instr_create(shader, op);
              instr->src[2] = evaluate_rvalue(offset);
              instr->const_index[0] = 0;
-            dest = &instr->dest;
           } else {
              instr->const_index[0] = const_offset->value.u[0];
           }
diff --git a/src/glsl/nir/glsl_to_nir.h b/src/glsl/nir/glsl_to_nir.h

index 3801e8c55c631c18949495dc20918aa6db74b0b2..29badcda08d6e5bd82f6c3cec3f030a08f9689d6 100644 (file)
--- a/src/glsl/nir/glsl_to_nir.h
+++ b/src/glsl/nir/glsl_to_nir.h
@@ -32,7 +32,8 @@
  extern "C" {
  #endif
  
-nir_shader *glsl_to_nir(struct gl_shader *sh,
+nir_shader *glsl_to_nir(const struct gl_shader_program *shader_prog,
+                        gl_shader_stage stage,
                          const nir_shader_compiler_options *options);
  
  #ifdef __cplusplus
diff --git a/src/glsl/nir/nir.c b/src/glsl/nir/nir.c

index 57fd959c9313b58687bc148b58dae4f29b7e3995..e12da80528113ea47ed8a3595c2c573281d857ea 100644 (file)
--- a/src/glsl/nir/nir.c
+++ b/src/glsl/nir/nir.c
@@ -41,6 +41,7 @@ nir_shader_create(void *mem_ctx,
     exec_list_make_empty(&shader->outputs);
  
     shader->options = options;
+   memset(&shader->info, 0, sizeof(shader->info));
  
     exec_list_make_empty(&shader->functions);
     exec_list_make_empty(&shader->registers);
@@ -54,9 +55,6 @@ nir_shader_create(void *mem_ctx,
  
     shader->stage = stage;
  
-   shader->gs.vertices_out = 0;
-   shader->gs.invocations = 0;
-
     return shader;
  }
  
@@ -1489,10 +1487,11 @@ nir_intrinsic_from_system_value(gl_system_value val)
        return nir_intrinsic_load_work_group_id;
     case SYSTEM_VALUE_NUM_WORK_GROUPS:
        return nir_intrinsic_load_num_work_groups;
+   case SYSTEM_VALUE_PRIMITIVE_ID:
+      return nir_intrinsic_load_primitive_id;
     /* FINISHME: Add tessellation intrinsics.
     case SYSTEM_VALUE_TESS_COORD:
     case SYSTEM_VALUE_VERTICES_IN:
-   case SYSTEM_VALUE_PRIMITIVE_ID:
     case SYSTEM_VALUE_TESS_LEVEL_OUTER:
     case SYSTEM_VALUE_TESS_LEVEL_INNER:
      */
@@ -1529,6 +1528,8 @@ nir_system_value_from_intrinsic(nir_intrinsic_op intrin)
        return SYSTEM_VALUE_NUM_WORK_GROUPS;
     case nir_intrinsic_load_work_group_id:
        return SYSTEM_VALUE_WORK_GROUP_ID;
+   case nir_intrinsic_load_primitive_id:
+      return SYSTEM_VALUE_PRIMITIVE_ID;
     /* FINISHME: Add tessellation intrinsics.
        return SYSTEM_VALUE_TESS_COORD;
        return SYSTEM_VALUE_VERTICES_IN;
diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h

index 19a4a361a25d287e9e1f69a5fe834d35aa4ce616..107df3a2a67dfd54ea8bd4f79996385d422637b7 100644 (file)
--- a/src/glsl/nir/nir.h
+++ b/src/glsl/nir/nir.h
@@ -1480,6 +1480,45 @@ typedef struct nir_shader_compiler_options {
     bool native_integers;
  } nir_shader_compiler_options;
  
+typedef struct nir_shader_info {
+   const char *name;
+
+   /* Number of textures used by this shader */
+   unsigned num_textures;
+   /* Number of uniform buffers used by this shader */
+   unsigned num_ubos;
+   /* Number of atomic buffers used by this shader */
+   unsigned num_abos;
+   /* Number of shader storage buffers used by this shader */
+   unsigned num_ssbos;
+   /* Number of images used by this shader */
+   unsigned num_images;
+
+   /* Which inputs are actually read */
+   uint64_t inputs_read;
+   /* Which outputs are actually written */
+   uint64_t outputs_written;
+   /* Which system values are actually read */
+   uint64_t system_values_read;
+
+   /* Whether or not this shader ever uses textureGather() */
+   bool uses_texture_gather;
+
+   /* Whether or not this shader uses the gl_ClipDistance output */
+   bool uses_clip_distance_out;
+
+   /* Whether or not separate shader objects were used */
+   bool separate_shader;
+
+   struct {
+      /** The maximum number of vertices the geometry shader might write. */
+      unsigned vertices_out;
+
+      /** 1 .. MAX_GEOMETRY_SHADER_INVOCATIONS */
+      unsigned invocations;
+   } gs;
+} nir_shader_info;
+
  typedef struct nir_shader {
     /** list of uniforms (nir_variable) */
     struct exec_list uniforms;
@@ -1497,6 +1536,9 @@ typedef struct nir_shader {
      */
     const struct nir_shader_compiler_options *options;
  
+   /** Various bits of compile-time information about a given shader */
+   struct nir_shader_info info;
+
     /** list of global variables in the shader (nir_variable) */
     struct exec_list globals;
  
@@ -1519,14 +1561,6 @@ typedef struct nir_shader {
  
     /** The shader stage, such as MESA_SHADER_VERTEX. */
     gl_shader_stage stage;
-
-   struct {
-      /** The maximum number of vertices the geometry shader might write. */
-      unsigned vertices_out;
-
-      /** 1 .. MAX_GEOMETRY_SHADER_INVOCATIONS */
-      unsigned invocations;
-   } gs;
  } nir_shader;
  
  #define nir_foreach_overload(shader, overload)                        \
@@ -1861,6 +1895,7 @@ void nir_assign_var_locations(struct exec_list *var_list,
                                int (*type_size)(const struct glsl_type *));
  
  void nir_lower_io(nir_shader *shader,
+                  nir_variable_mode mode,
                    int (*type_size)(const struct glsl_type *));
  void nir_lower_vars_to_ssa(nir_shader *shader);
  
diff --git a/src/glsl/nir/nir_intrinsics.h b/src/glsl/nir/nir_intrinsics.h

index e02779e157bbbe6002857fd645e00102392850b7..44eff3b4da54acf3c5fdbdd56c445513f95599e6 100644 (file)
--- a/src/glsl/nir/nir_intrinsics.h
+++ b/src/glsl/nir/nir_intrinsics.h
@@ -194,6 +194,7 @@ SYSTEM_VALUE(instance_id, 1, 0)
  SYSTEM_VALUE(sample_id, 1, 0)
  SYSTEM_VALUE(sample_pos, 2, 0)
  SYSTEM_VALUE(sample_mask_in, 1, 0)
+SYSTEM_VALUE(primitive_id, 1, 0)
  SYSTEM_VALUE(invocation_id, 1, 0)
  SYSTEM_VALUE(local_invocation_id, 3, 0)
  SYSTEM_VALUE(work_group_id, 3, 0)
diff --git a/src/glsl/nir/nir_lower_gs_intrinsics.c b/src/glsl/nir/nir_lower_gs_intrinsics.c

index 2ee4e5c45d6a91bbaa5d764f1ad0f114373d9a65..e0d067885d82bda3c818716df36c50e13c1086c7 100644 (file)
--- a/src/glsl/nir/nir_lower_gs_intrinsics.c
+++ b/src/glsl/nir/nir_lower_gs_intrinsics.c
@@ -76,7 +76,7 @@ rewrite_emit_vertex(nir_intrinsic_instr *intrin, struct state *state)
     b->cursor = nir_before_instr(&intrin->instr);
     nir_ssa_def *count = nir_load_var(b, state->vertex_count_var);
  
-   nir_ssa_def *max_vertices = nir_imm_int(b, b->shader->gs.vertices_out);
+   nir_ssa_def *max_vertices = nir_imm_int(b, b->shader->info.gs.vertices_out);
  
     /* Create: if (vertex_count < max_vertices) and insert it.
      *
diff --git a/src/glsl/nir/nir_lower_io.c b/src/glsl/nir/nir_lower_io.c

index 9f79c5606ca24eb68f4031375ee41ee0b6108495..f32c09d04a2ae3c59f44bbf5689e81d468acf07c 100644 (file)
--- a/src/glsl/nir/nir_lower_io.c
+++ b/src/glsl/nir/nir_lower_io.c
@@ -38,6 +38,7 @@ struct lower_io_state {
     nir_builder builder;
     void *mem_ctx;
     int (*type_size)(const struct glsl_type *type);
+   nir_variable_mode mode;
  };
  
  void
@@ -154,9 +155,17 @@ nir_lower_io_block(nir_block *block, void *void_state)
  
        nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
  
+      if (intrin->intrinsic != nir_intrinsic_load_var &&
+          intrin->intrinsic != nir_intrinsic_store_var)
+         continue;
+
+      nir_variable_mode mode = intrin->variables[0]->var->data.mode;
+
+      if (state->mode != -1 && state->mode != mode)
+         continue;
+
        switch (intrin->intrinsic) {
        case nir_intrinsic_load_var: {
-         nir_variable_mode mode = intrin->variables[0]->var->data.mode;
           if (mode != nir_var_shader_in && mode != nir_var_uniform)
              continue;
  
@@ -239,12 +248,15 @@ nir_lower_io_block(nir_block *block, void *void_state)
  }
  
  static void
-nir_lower_io_impl(nir_function_impl *impl, int(*type_size)(const struct glsl_type *))
+nir_lower_io_impl(nir_function_impl *impl,
+                  nir_variable_mode mode,
+                  int (*type_size)(const struct glsl_type *))
  {
     struct lower_io_state state;
  
     nir_builder_init(&state.builder, impl);
     state.mem_ctx = ralloc_parent(impl);
+   state.mode = mode;
     state.type_size = type_size;
  
     nir_foreach_block(impl, nir_lower_io_block, &state);
@@ -254,10 +266,11 @@ nir_lower_io_impl(nir_function_impl *impl, int(*type_size)(const struct glsl_typ
  }
  
  void
-nir_lower_io(nir_shader *shader, int(*type_size)(const struct glsl_type *))
+nir_lower_io(nir_shader *shader, nir_variable_mode mode,
+             int (*type_size)(const struct glsl_type *))
  {
     nir_foreach_overload(shader, overload) {
        if (overload->impl)
-         nir_lower_io_impl(overload->impl, type_size);
+         nir_lower_io_impl(overload->impl, mode, type_size);
     }
  }
diff --git a/src/glsl/nir/nir_lower_system_values.c b/src/glsl/nir/nir_lower_system_values.c

index d77bb2f821304e871a924b5c4efc14dca4714e33..21904f81b97828cc9a75aca3b42bf8bd7dd2b130 100644 (file)
--- a/src/glsl/nir/nir_lower_system_values.c
+++ b/src/glsl/nir/nir_lower_system_values.c
@@ -74,7 +74,7 @@ convert_block(nir_block *block, void *state)
  static bool
  convert_impl(nir_function_impl *impl)
  {
-   bool progress;
+   bool progress = false;
  
     nir_foreach_block(impl, convert_block, &progress);
     nir_metadata_preserve(impl, nir_metadata_block_index |
diff --git a/src/glsl/nir/nir_opt_remove_phis.c b/src/glsl/nir/nir_opt_remove_phis.c

index bf4a67e70eab98ec1e7161b7ef6ab1e5b6b1fbea..5bdf7ef4da742f201be2f501eca9715ec5cb4397 100644 (file)
--- a/src/glsl/nir/nir_opt_remove_phis.c
+++ b/src/glsl/nir/nir_opt_remove_phis.c
@@ -58,6 +58,20 @@ remove_phis_block(nir_block *block, void *state)
  
        nir_foreach_phi_src(phi, src) {
           assert(src->src.is_ssa);
+
+         /* For phi nodes at the beginning of loops, we may encounter some
+          * sources from backedges that point back to the destination of the
+          * same phi, i.e. something like:
+          *
+          * a = phi(a, b, ...)
+          *
+          * We can safely ignore these sources, since if all of the normal
+          * sources point to the same definition, then that definition must
+          * still dominate the phi node, and the phi will still always take
+          * the value of that definition.
+          */
+         if (src->src.ssa == &phi->dest.ssa)
+            continue;
           
           if (def == NULL) {
              def  = src->src.ssa;
@@ -72,6 +86,11 @@ remove_phis_block(nir_block *block, void *state)
        if (!srcs_same)
           continue;
  
+      /* We must have found at least one definition, since there must be at
+       * least one forward edge.
+       */
+      assert(def != NULL);
+
        assert(phi->dest.is_ssa);
        nir_ssa_def_rewrite_uses(&phi->dest.ssa, nir_src_for_ssa(def));
        nir_instr_remove(instr);
diff --git a/src/glsl/shader_enums.h b/src/glsl/shader_enums.h

index 99acc640496010e9c6dd1c6b2a4dbb0a1c20aace..2a5d2c5bfa75199c13718c317c2b77020dd87919 100644 (file)
--- a/src/glsl/shader_enums.h
+++ b/src/glsl/shader_enums.h
@@ -399,7 +399,7 @@ typedef enum
     /*@{*/
     SYSTEM_VALUE_TESS_COORD,
     SYSTEM_VALUE_VERTICES_IN,    /**< Tessellation vertices in input patch */
-   SYSTEM_VALUE_PRIMITIVE_ID,   /**< (currently not used by GS) */
+   SYSTEM_VALUE_PRIMITIVE_ID,
     SYSTEM_VALUE_TESS_LEVEL_OUTER, /**< TES input */
     SYSTEM_VALUE_TESS_LEVEL_INNER, /**< TES input */
     /*@}*/
diff --git a/src/glx/Makefile.am b/src/glx/Makefile.am

index 6e50e096fa2a48a428e425c263194aa730922570..e64955e3b3e237986f2d4327f2a91616bac5bd45 100644 (file)
--- a/src/glx/Makefile.am
+++ b/src/glx/Makefile.am
@@ -46,6 +46,7 @@ AM_CFLAGS = \
         $(EXTRA_DEFINES_XF86VIDMODE) \
         -D_REENTRANT \
         -DDEFAULT_DRIVER_DIR=\"$(DRI_DRIVER_SEARCH_DIR)\" \
+       -DGL_LIB_NAME=\"lib@GL_LIB@.so.1\" \
         $(DEFINES) \
         $(LIBDRM_CFLAGS) \
         $(DRI2PROTO_CFLAGS) \
diff --git a/src/glx/dri_common.c b/src/glx/dri_common.c

index eedcd46a15a2de4a99410ce7678cfab0ee326406..8a56385c4bde92a08418d4c88baf622a949ee067 100644 (file)
--- a/src/glx/dri_common.c
+++ b/src/glx/dri_common.c
@@ -73,6 +73,10 @@ dri_message(int level, const char *f, ...)
     }
  }
  
+#ifndef GL_LIB_NAME
+#define GL_LIB_NAME "libGL.so.1"
+#endif
+
  #ifndef DEFAULT_DRIVER_DIR
  /* this is normally defined in Mesa/configs/default with DRI_DRIVER_SEARCH_PATH */
  #define DEFAULT_DRIVER_DIR "/usr/local/lib/dri"
@@ -99,7 +103,7 @@ driOpenDriver(const char *driverName)
     int len;
  
     /* Attempt to make sure libGL symbols will be visible to the driver */
-   glhandle = dlopen("libGL.so.1", RTLD_NOW | RTLD_GLOBAL);
+   glhandle = dlopen(GL_LIB_NAME, RTLD_NOW | RTLD_GLOBAL);
  
     libPaths = NULL;
     if (geteuid() == getuid()) {
diff --git a/src/glx/glxcmds.c b/src/glx/glxcmds.c

index 26ff804297fee6ff817f0af1aa2d437b91f1e7c1..93e8db5a367da96263812bea312617324f349828 100644 (file)
--- a/src/glx/glxcmds.c
+++ b/src/glx/glxcmds.c
@@ -2646,7 +2646,11 @@ _X_EXPORT void (*glXGetProcAddressARB(const GLubyte * procName)) (void)
   */
  _X_EXPORT void (*glXGetProcAddress(const GLubyte * procName)) (void)
  #if defined(__GNUC__) && !defined(GLX_ALIAS_UNSUPPORTED)
+# if defined(USE_MGL_NAMESPACE)
+   __attribute__ ((alias("mglXGetProcAddressARB")));
+# else
     __attribute__ ((alias("glXGetProcAddressARB")));
+# endif
  #else
  {
     return glXGetProcAddressARB(procName);
diff --git a/src/glx/glxextensions.h b/src/glx/glxextensions.h

index 90b173fc915670e14fec711d6896a632203f9681..3a9bc8230529d7c701cb4162bc22ec1a57a2a81b 100644 (file)
--- a/src/glx/glxextensions.h
+++ b/src/glx/glxextensions.h
@@ -281,11 +281,17 @@ typedef void (*PFNGLXDISABLEEXTENSIONPROC) (const char *name);
  # define GLX_ALIAS_VOID(real_func, proto_args, args, aliased_func)
  #else
  # if defined(__GNUC__) && !defined(GLX_ALIAS_UNSUPPORTED)
-#  define GLX_ALIAS(return_type, real_func, proto_args, args, aliased_func) \
+/* GLX_ALIAS and GLX_ALIAS_VOID both expand to the macro GLX_ALIAS2. Using the
+ * extra expansion means that the name mangling macros in glx_mangle.h will
+ * apply before stringification, so the alias attribute will have a string like
+ * "mglXFoo" instead of "glXFoo". */
+#  define GLX_ALIAS2(return_type, real_func, proto_args, args, aliased_func) \
     return_type  real_func  proto_args                                   \
     __attribute__ ((alias( # aliased_func ) ));
+#  define GLX_ALIAS(return_type, real_func, proto_args, args, aliased_func) \
+   GLX_ALIAS2(return_type, real_func, proto_args, args, aliased_func)
  #  define GLX_ALIAS_VOID(real_func, proto_args, args, aliased_func) \
-   GLX_ALIAS(void, real_func, proto_args, args, aliased_func)
+   GLX_ALIAS2(void, real_func, proto_args, args, aliased_func)
  # else
  #  define GLX_ALIAS(return_type, real_func, proto_args, args, aliased_func) \
     return_type  real_func  proto_args                                   \
diff --git a/src/mapi/mapi_glapi.c b/src/mapi/mapi_glapi.c

index 70605f3dfa1df43b3c882a54b880f8b4fb3f5a55..9f02edb7cd3a9e9ed7d8617e21b8aae1ba3aad97 100644 (file)
--- a/src/mapi/mapi_glapi.c
+++ b/src/mapi/mapi_glapi.c
@@ -175,7 +175,7 @@ _glapi_get_stub(const char *name, int generate)
     const struct mapi_stub *stub;
  
  #ifdef USE_MGL_NAMESPACE
-   if (name)
+   if (name && name[0] == 'm')
        name++;
  #endif
  
diff --git a/src/mesa/drivers/common/meta_blit.c b/src/mesa/drivers/common/meta_blit.c

index a41fe42338f3891d95c735b2cab51cf3a2dab490..5972a5af0c95db682b09a397837f766bbf11ef08 100644 (file)
--- a/src/mesa/drivers/common/meta_blit.c
+++ b/src/mesa/drivers/common/meta_blit.c
@@ -71,9 +71,7 @@ setup_glsl_msaa_blit_scaled_shader(struct gl_context *ctx,
     char *sample_map_str = rzalloc_size(mem_ctx, 1);
     char *sample_map_expr = rzalloc_size(mem_ctx, 1);
     char *texel_fetch_macro = rzalloc_size(mem_ctx, 1);
-   const char *vs_source;
     const char *sampler_array_suffix = "";
-   const char *texcoord_type = "vec2";
     float y_scale;
     enum blit_msaa_shader shader_index;
  
@@ -99,7 +97,6 @@ setup_glsl_msaa_blit_scaled_shader(struct gl_context *ctx,
        shader_index += BLIT_2X_MSAA_SHADER_2D_MULTISAMPLE_ARRAY_SCALED_RESOLVE -
                        BLIT_2X_MSAA_SHADER_2D_MULTISAMPLE_SCALED_RESOLVE;
        sampler_array_suffix = "Array";
-      texcoord_type = "vec3";
     }
  
     if (blit->msaa_shaders[shader_index]) {
@@ -150,28 +147,37 @@ setup_glsl_msaa_blit_scaled_shader(struct gl_context *ctx,
                            "   const int sample_map[%d] = int[%d](%s);\n",
                            samples, samples, sample_map_str);
  
-   ralloc_asprintf_append(&texel_fetch_macro,
-                          "#define TEXEL_FETCH(coord) texelFetch(texSampler, i%s(coord), %s);\n",
-                          texcoord_type, sample_number);
+   if (target == GL_TEXTURE_2D_MULTISAMPLE) {
+      ralloc_asprintf_append(&texel_fetch_macro,
+                             "#define TEXEL_FETCH(coord) texelFetch(texSampler, ivec2(coord), %s);\n",
+                             sample_number);
+   } else {
+      ralloc_asprintf_append(&texel_fetch_macro,
+                             "#define TEXEL_FETCH(coord) texelFetch(texSampler, ivec3(coord, layer), %s);\n",
+                             sample_number);
+   }
  
-   vs_source = ralloc_asprintf(mem_ctx,
+   static const char vs_source[] =
                                 "#version 130\n"
                                 "in vec2 position;\n"
-                               "in %s textureCoords;\n"
-                               "out %s texCoords;\n"
+                               "in vec3 textureCoords;\n"
+                               "out vec2 texCoords;\n"
+                               "flat out int layer;\n"
                                 "void main()\n"
                                 "{\n"
-                               "   texCoords = textureCoords;\n"
+                               "   texCoords = textureCoords.xy;\n"
+                               "   layer = int(textureCoords.z);\n"
                                 "   gl_Position = vec4(position, 0.0, 1.0);\n"
-                               "}\n",
-                               texcoord_type,
-                               texcoord_type);
+                               "}\n"
+      ;
+
     fs_source = ralloc_asprintf(mem_ctx,
                                 "#version 130\n"
                                 "#extension GL_ARB_texture_multisample : enable\n"
                                 "uniform sampler2DMS%s texSampler;\n"
                                 "uniform float src_width, src_height;\n"
-                               "in %s texCoords;\n"
+                               "in vec2 texCoords;\n"
+                               "flat in int layer;\n"
                                 "out vec4 out_color;\n"
                                 "\n"
                                 "void main()\n"
@@ -212,7 +218,6 @@ setup_glsl_msaa_blit_scaled_shader(struct gl_context *ctx,
                                 "   out_color = mix(x_0_color, x_1_color, interp.y);\n"
                                 "}\n",
                                 sampler_array_suffix,
-                               texcoord_type,
                                 sample_map_expr,
                                 y_scale,
                                 1.0f / y_scale,
diff --git a/src/mesa/drivers/dri/i915/i830_context.h b/src/mesa/drivers/dri/i915/i830_context.h

index 140f617e007c8f7ddf23f142a1c56d5f0f1e53ee..92952cf61b3894510df52af76b4d0bc49d074f8c 100644 (file)
--- a/src/mesa/drivers/dri/i915/i830_context.h
+++ b/src/mesa/drivers/dri/i915/i830_context.h
@@ -42,10 +42,10 @@
  #define I830_UPLOAD_STIPPLE          0x4
  #define I830_UPLOAD_INVARIENT        0x8
  #define I830_UPLOAD_RASTER_RULES     0x10
-#define I830_UPLOAD_TEX(i)           (0x10<<(i))
-#define I830_UPLOAD_TEXBLEND(i)      (0x100<<(i))
-#define I830_UPLOAD_TEX_ALL          (0x0f0)
-#define I830_UPLOAD_TEXBLEND_ALL     (0xf00)
+#define I830_UPLOAD_TEX(i)           (0x0100<<(i))
+#define I830_UPLOAD_TEXBLEND(i)      (0x1000<<(i))
+#define I830_UPLOAD_TEX_ALL          (0x0f00)
+#define I830_UPLOAD_TEXBLEND_ALL     (0xf000)
  
  /* State structure offsets - these will probably disappear.
   */
diff --git a/src/mesa/drivers/dri/i915/i915_context.h b/src/mesa/drivers/dri/i915/i915_context.h

index fa58ecb816567e5b1647b68f0fc499212e192b3e..d8f592bcb9f185cc5e926e4041aff801657c05ea 100644 (file)
--- a/src/mesa/drivers/dri/i915/i915_context.h
+++ b/src/mesa/drivers/dri/i915/i915_context.h
@@ -115,6 +115,8 @@ enum {
     I915_RASTER_RULES_SETUP_SIZE,
  };
  
+#define I915_TEX_UNITS 8
+
  #define I915_MAX_CONSTANT      32
  #define I915_CONSTANT_SIZE     (2+(4*I915_MAX_CONSTANT))
  
@@ -194,7 +196,8 @@ struct i915_fragment_program
  
     /* Helpers for i915_fragprog.c:
      */
-   GLuint wpos_tex;
+   uint8_t texcoord_mapping[I915_TEX_UNITS];
+   uint8_t wpos_tex;
     bool depth_written;
  
     struct
@@ -205,15 +208,6 @@ struct i915_fragment_program
     GLuint nr_params;
  };
  
-
-
-
-
-
-
-#define I915_TEX_UNITS 8
-
-
  struct i915_hw_state
  {
     GLuint Ctx[I915_CTX_SETUP_SIZE];
diff --git a/src/mesa/drivers/dri/i915/i915_fragprog.c b/src/mesa/drivers/dri/i915/i915_fragprog.c

index 03c32e56d8274b5e16af041fe32fc9576b96dac0..1a5943c87fba4dc5ae64f62db266934d3638b944 100644 (file)
--- a/src/mesa/drivers/dri/i915/i915_fragprog.c
+++ b/src/mesa/drivers/dri/i915/i915_fragprog.c
@@ -72,6 +72,22 @@ static const GLfloat cos_constants[4] = { 1.0,
     -1.0 / (6 * 5 * 4 * 3 * 2 * 1)
  };
  
+/* texcoord_mapping[unit] = index | TEXCOORD_{TEX,VAR} */
+#define TEXCOORD_TEX (0<<7)
+#define TEXCOORD_VAR (1<<7)
+
+static unsigned
+get_texcoord_mapping(struct i915_fragment_program *p, uint8_t texcoord)
+{
+   for (unsigned i = 0; i < p->ctx->Const.MaxTextureCoordUnits; i++) {
+      if (p->texcoord_mapping[i] == texcoord)
+         return i;
+   }
+
+   /* blah */
+   return p->ctx->Const.MaxTextureCoordUnits - 1;
+}
+
  /**
   * Retrieve a ureg for the given source register.  Will emit
   * constants, apply swizzling and negation as needed.
@@ -82,6 +98,7 @@ src_vector(struct i915_fragment_program *p,
             const struct gl_fragment_program *program)
  {
     GLuint src;
+   unsigned unit;
  
     switch (source->File) {
  
@@ -119,8 +136,10 @@ src_vector(struct i915_fragment_program *p,
        case VARYING_SLOT_TEX5:
        case VARYING_SLOT_TEX6:
        case VARYING_SLOT_TEX7:
+         unit = get_texcoord_mapping(p, (source->Index -
+                                         VARYING_SLOT_TEX0) | TEXCOORD_TEX);
           src = i915_emit_decl(p, REG_TYPE_T,
-                              T_TEX0 + (source->Index - VARYING_SLOT_TEX0),
+                              T_TEX0 + unit,
                                D0_CHANNEL_ALL);
          break;
  
@@ -132,8 +151,10 @@ src_vector(struct i915_fragment_program *p,
        case VARYING_SLOT_VAR0 + 5:
        case VARYING_SLOT_VAR0 + 6:
        case VARYING_SLOT_VAR0 + 7:
+         unit = get_texcoord_mapping(p, (source->Index -
+                                         VARYING_SLOT_VAR0) | TEXCOORD_VAR);
           src = i915_emit_decl(p, REG_TYPE_T,
-                              T_TEX0 + (source->Index - VARYING_SLOT_VAR0),
+                              T_TEX0 + unit,
                                D0_CHANNEL_ALL);
           break;
  
@@ -1176,27 +1197,54 @@ fixup_depth_write(struct i915_fragment_program *p)
     }
  }
  
+static void
+check_texcoord_mapping(struct i915_fragment_program *p)
+{
+   GLbitfield64 inputs = p->FragProg.Base.InputsRead;
+   unsigned unit = 0;
+
+   for (unsigned i = 0; i < p->ctx->Const.MaxTextureCoordUnits; i++) {
+      if (inputs & VARYING_BIT_TEX(i)) {
+         if (unit >= p->ctx->Const.MaxTextureCoordUnits) {
+            unit++;
+            break;
+         }
+         p->texcoord_mapping[unit++] = i | TEXCOORD_TEX;
+      }
+      if (inputs & VARYING_BIT_VAR(i)) {
+         if (unit >= p->ctx->Const.MaxTextureCoordUnits) {
+            unit++;
+            break;
+         }
+         p->texcoord_mapping[unit++] = i | TEXCOORD_VAR;
+      }
+   }
+
+   if (unit > p->ctx->Const.MaxTextureCoordUnits)
+      i915_program_error(p, "Too many texcoord units");
+}
  
  static void
  check_wpos(struct i915_fragment_program *p)
  {
     GLbitfield64 inputs = p->FragProg.Base.InputsRead;
     GLint i;
+   unsigned unit = 0;
  
     p->wpos_tex = -1;
  
+   if ((inputs & VARYING_BIT_POS) == 0)
+      return;
+
     for (i = 0; i < p->ctx->Const.MaxTextureCoordUnits; i++) {
-      if (inputs & (VARYING_BIT_TEX(i) | VARYING_BIT_VAR(i)))
-         continue;
-      else if (inputs & VARYING_BIT_POS) {
-         p->wpos_tex = i;
-         inputs &= ~VARYING_BIT_POS;
-      }
+      unit += !!(inputs & VARYING_BIT_TEX(i));
+      unit += !!(inputs & VARYING_BIT_VAR(i));
     }
  
-   if (inputs & VARYING_BIT_POS) {
+   if (unit < p->ctx->Const.MaxTextureCoordUnits)
+      p->wpos_tex = unit;
+   else
        i915_program_error(p, "No free texcoord for wpos value");
-   }
  }
  
  
@@ -1212,6 +1260,7 @@ translate_program(struct i915_fragment_program *p)
     }
  
     i915_init_program(i915, p);
+   check_texcoord_mapping(p);
     check_wpos(p);
     upload_program(p);
     fixup_depth_write(p);
@@ -1420,22 +1469,24 @@ i915ValidateFragmentProgram(struct i915_context *i915)
  
     for (i = 0; i < p->ctx->Const.MaxTextureCoordUnits; i++) {
        if (inputsRead & VARYING_BIT_TEX(i)) {
+         int unit = get_texcoord_mapping(p, i | TEXCOORD_TEX);
           int sz = VB->AttribPtr[_TNL_ATTRIB_TEX0 + i]->size;
  
-         s2 &= ~S2_TEXCOORD_FMT(i, S2_TEXCOORD_FMT0_MASK);
-         s2 |= S2_TEXCOORD_FMT(i, SZ_TO_HW(sz));
+         s2 &= ~S2_TEXCOORD_FMT(unit, S2_TEXCOORD_FMT0_MASK);
+         s2 |= S2_TEXCOORD_FMT(unit, SZ_TO_HW(sz));
  
           EMIT_ATTR(_TNL_ATTRIB_TEX0 + i, EMIT_SZ(sz), 0, sz * 4);
        }
-      else if (inputsRead & VARYING_BIT_VAR(i)) {
+      if (inputsRead & VARYING_BIT_VAR(i)) {
+         int unit = get_texcoord_mapping(p, i | TEXCOORD_VAR);
           int sz = VB->AttribPtr[_TNL_ATTRIB_GENERIC0 + i]->size;
  
-         s2 &= ~S2_TEXCOORD_FMT(i, S2_TEXCOORD_FMT0_MASK);
-         s2 |= S2_TEXCOORD_FMT(i, SZ_TO_HW(sz));
+         s2 &= ~S2_TEXCOORD_FMT(unit, S2_TEXCOORD_FMT0_MASK);
+         s2 |= S2_TEXCOORD_FMT(unit, SZ_TO_HW(sz));
  
           EMIT_ATTR(_TNL_ATTRIB_GENERIC0 + i, EMIT_SZ(sz), 0, sz * 4);
        }
-      else if (i == p->wpos_tex) {
+      if (i == p->wpos_tex) {
          int wpos_size = 4 * sizeof(float);
           /* If WPOS is required, duplicate the XYZ position data in an
            * unused texture coordinate:
diff --git a/src/mesa/drivers/dri/i915/intel_fbo.c b/src/mesa/drivers/dri/i915/intel_fbo.c

index 6701366637726394d383a60c60877023c1bf1026..12cc7e3a71b42d4dec07e0b50931c04ea606139d 100644 (file)
--- a/src/mesa/drivers/dri/i915/intel_fbo.c
+++ b/src/mesa/drivers/dri/i915/intel_fbo.c
@@ -658,6 +658,11 @@ intel_blit_framebuffer_with_blitter(struct gl_context *ctx,
  {
     struct intel_context *intel = intel_context(ctx);
  
+   /* Sync up the state of window system buffers.  We need to do this before
+    * we go looking for the buffers.
+    */
+   intel_prepare_render(intel);
+
     if (mask & GL_COLOR_BUFFER_BIT) {
        GLint i;
        struct gl_renderbuffer *src_rb = readFb->_ColorReadBuffer;
diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources

index cc3ecaf7a8b2021f9c1d6e80dde3c5b4da421e80..eb8196d4845f6ed55dd9181e7026acd30c3db00b 100644 (file)
--- a/src/mesa/drivers/dri/i965/Makefile.sources
+++ b/src/mesa/drivers/dri/i965/Makefile.sources
@@ -86,6 +86,7 @@ i965_FILES = \
         brw_nir.h \
         brw_nir.c \
         brw_nir_analyze_boolean_resolves.c \
+       brw_nir_uniforms.cpp \
         brw_object_purgeable.c \
         brw_packed_float.c \
         brw_performance_monitor.c \
@@ -133,7 +134,6 @@ i965_FILES = \
         brw_vec4_surface_builder.cpp \
         brw_vec4_surface_builder.h \
         brw_vec4_visitor.cpp \
-       brw_vec4_vp.cpp \
         brw_vec4_vs_visitor.cpp \
         brw_vs.c \
         brw_vs.h \
diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c

index 9dcdaf5cd4fe6076ed3ef0b9ed934e71127c26f3..4d499295730288eb9e407379d45626ea93dc1eb0 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -1436,7 +1436,6 @@ intel_process_dri2_buffer(struct brw_context *brw,
                buffer->cpp, buffer->pitch);
     }
  
-   intel_miptree_release(&rb->mt);
     bo = drm_intel_bo_gem_create_from_name(brw->bufmgr, buffer_name,
                                            buffer->name);
     if (!bo) {
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h

index 2479182e37049ccafebf061bfcf5de013dba8604..d921a9bb7dd5cad55cc4b352c84ff70c4efeb5cb 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -340,9 +340,6 @@ struct brw_shader {
     bool compiled_once;
  };
  
-/* Note: If adding fields that need anything besides a normal memcmp() for
- * comparing them, be sure to go fix brw_stage_prog_data_compare().
- */
  struct brw_stage_prog_data {
     struct {
        /** size of our binding table. */
@@ -384,18 +381,11 @@ struct brw_stage_prog_data {
  
     /* Pointers to tracked values (only valid once
      * _mesa_load_state_parameters has been called at runtime).
-    *
-    * These must be the last fields of the struct (see
-    * brw_stage_prog_data_compare()).
      */
     const gl_constant_value **param;
     const gl_constant_value **pull_param;
  
-   /**
-    * Image metadata passed to the shader as uniforms.  This is deliberately
-    * ignored by brw_stage_prog_data_compare() because its contents don't have
-    * any influence on program compilation.
-    */
+   /** Image metadata passed to the shader as uniforms. */
     struct brw_image_param *image_param;
  };
  
@@ -449,9 +439,6 @@ struct brw_image_param {
   * there can be many of these, each in a different GL state
   * corresponding to a different brw_wm_prog_key struct, with different
   * compiled programs.
- *
- * Note: brw_wm_prog_data_compare() must be updated when adding fields to this
- * struct!
   */
  struct brw_wm_prog_data {
     struct brw_stage_prog_data base;
@@ -495,9 +482,6 @@ struct brw_wm_prog_data {
     int urb_setup[VARYING_SLOT_MAX];
  };
  
-/* Note: brw_cs_prog_data_compare() must be updated when adding fields to this
- * struct!
- */
  struct brw_cs_prog_data {
     struct brw_stage_prog_data base;
  
@@ -698,9 +682,6 @@ enum shader_dispatch_mode {
     DISPATCH_MODE_SIMD8 = 3,
  };
  
-/* Note: brw_vue_prog_data_compare() must be updated when adding fields to
- * this struct!
- */
  struct brw_vue_prog_data {
     struct brw_stage_prog_data base;
     struct brw_vue_map vue_map;
@@ -718,9 +699,6 @@ struct brw_vue_prog_data {
  };
  
  
-/* Note: brw_vs_prog_data_compare() must be updated when adding fields to this
- * struct!
- */
  struct brw_vs_prog_data {
     struct brw_vue_prog_data base;
  
@@ -780,9 +758,6 @@ struct brw_vs_prog_data {
  
  #define SURF_INDEX_GEN6_SOL_BINDING(t) (t)
  
-/* Note: brw_gs_prog_data_compare() must be updated when adding fields to
- * this struct!
- */
  struct brw_gs_prog_data
  {
     struct brw_vue_prog_data base;
@@ -875,7 +850,6 @@ struct brw_cache_item {
  };
  
  
-typedef bool (*cache_aux_compare_func)(const void *a, const void *b);
  typedef void (*cache_aux_free_func)(const void *aux);
  
  struct brw_cache {
@@ -888,12 +862,6 @@ struct brw_cache {
     uint32_t next_offset;
     bool bo_used_by_gpu;
  
-   /**
-    * Optional functions used in determining whether the prog_data for a new
-    * cache item matches an existing cache item (in case there's relevant data
-    * outside of the prog_data).  If NULL, a plain memcmp is done.
-    */
-   cache_aux_compare_func aux_compare[BRW_MAX_CACHE];
     /** Optional functions for freeing other pointers attached to a prog_data. */
     cache_aux_free_func aux_free[BRW_MAX_CACHE];
  };
@@ -1554,7 +1522,7 @@ struct brw_context
  
     int num_atoms[BRW_NUM_PIPELINES];
     const struct brw_tracked_state render_atoms[60];
-   const struct brw_tracked_state compute_atoms[7];
+   const struct brw_tracked_state compute_atoms[8];
  
     /* If (INTEL_DEBUG & DEBUG_BATCH) */
     struct {
diff --git a/src/mesa/drivers/dri/i965/brw_cs.c b/src/mesa/drivers/dri/i965/brw_cs.c

index cb3fae66ec35b6f384a92ccaae5887ff14834314..6b64030a8689e7a00ffa369b036549176be53565 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_cs.c
+++ b/src/mesa/drivers/dri/i965/brw_cs.c
@@ -30,26 +30,23 @@
  #include "intel_mipmap_tree.h"
  #include "brw_state.h"
  #include "intel_batchbuffer.h"
+#include "brw_nir.h"
  
-bool
-brw_cs_prog_data_compare(const void *in_a, const void *in_b)
+static void
+assign_cs_binding_table_offsets(const struct brw_device_info *devinfo,
+                                const struct gl_shader_program *shader_prog,
+                                const struct gl_program *prog,
+                                struct brw_cs_prog_data *prog_data)
  {
-   const struct brw_cs_prog_data *a =
-      (const struct brw_cs_prog_data *)in_a;
-   const struct brw_cs_prog_data *b =
-      (const struct brw_cs_prog_data *)in_b;
-
-   /* Compare the base structure. */
-   if (!brw_stage_prog_data_compare(&a->base, &b->base))
-      return false;
+   uint32_t next_binding_table_offset = 0;
  
-   /* Compare the rest of the structure. */
-   const unsigned offset = sizeof(struct brw_stage_prog_data);
-   if (memcmp(((char *) a) + offset, ((char *) b) + offset,
-              sizeof(struct brw_cs_prog_data) - offset))
-      return false;
+   /* May not be used if the gl_NumWorkGroups variable is not accessed. */
+   prog_data->binding_table.work_groups_start = next_binding_table_offset;
+   next_binding_table_offset++;
  
-   return true;
+   brw_assign_common_binding_table_offsets(MESA_SHADER_COMPUTE, devinfo,
+                                           shader_prog, prog, &prog_data->base,
+                                           next_binding_table_offset);
  }
  
  static bool
@@ -72,12 +69,14 @@ brw_codegen_cs_prog(struct brw_context *brw,
  
     memset(&prog_data, 0, sizeof(prog_data));
  
+   assign_cs_binding_table_offsets(brw->intelScreen->devinfo, prog,
+                                   &cp->program.Base, &prog_data);
+
     /* Allocate the references to the uniforms that will end up in the
      * prog_data associated with the compiled program, and which will be freed
      * by the state cache.
      */
-   int param_count = cs->base.num_uniform_components +
-                     cs->base.NumImages * BRW_IMAGE_PARAM_SIZE;
+   int param_count = cp->program.Base.nir->num_uniforms;
  
     /* The backend also sometimes adds params for texture size. */
     param_count += 2 * ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits;
@@ -90,6 +89,9 @@ brw_codegen_cs_prog(struct brw_context *brw,
     prog_data.base.nr_params = param_count;
     prog_data.base.nr_image_params = cs->base.NumImages;
  
+   brw_nir_setup_glsl_uniforms(cp->program.Base.nir, prog, &cp->program.Base,
+                               &prog_data.base, true);
+
     if (unlikely(brw->perf_debug)) {
        start_busy = (brw->batch.last_bo &&
                      drm_intel_bo_busy(brw->batch.last_bo));
diff --git a/src/mesa/drivers/dri/i965/brw_cs.h b/src/mesa/drivers/dri/i965/brw_cs.h

index 746fb05166c6ae58d204862100b66f83d2949040..0c0ed2bc909897a264bd8b643cfcdc3760bd8af6 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_cs.h
+++ b/src/mesa/drivers/dri/i965/brw_cs.h
@@ -36,8 +36,6 @@ struct brw_cs_prog_key {
  extern "C" {
  #endif
  
-bool brw_cs_prog_data_compare(const void *a, const void *b);
-
  void
  brw_upload_cs_prog(struct brw_context *brw);
  
@@ -51,8 +49,7 @@ brw_cs_emit(struct brw_context *brw,
              unsigned *final_assembly_size);
  
  unsigned
-brw_cs_prog_local_id_payload_dwords(const struct gl_program *prog,
-                                    unsigned dispatch_width);
+brw_cs_prog_local_id_payload_dwords(unsigned dispatch_width);
  
  #ifdef __cplusplus
  }
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp

index e620301fde7e16f89bfe9995370092cb64649a17..0a6b23284d970fa3a897d161432286a676e42761 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -949,20 +949,6 @@ fs_visitor::import_uniforms(fs_visitor *v)
     this->param_size = v->param_size;
  }
  
-void
-fs_visitor::setup_vec4_uniform_value(unsigned param_offset,
-                                     const gl_constant_value *values,
-                                     unsigned n)
-{
-   static const gl_constant_value zero = { 0 };
-
-   for (unsigned i = 0; i < n; ++i)
-      stage_prog_data->param[param_offset + i] = &values[i];
-
-   for (unsigned i = n; i < 4; ++i)
-      stage_prog_data->param[param_offset + i] = &zero;
-}
-
  fs_reg *
  fs_visitor::emit_fragcoord_interpolation(bool pixel_center_integer,
                                           bool origin_upper_left)
@@ -1416,7 +1402,7 @@ fs_visitor::calculate_urb_setup()
     int urb_next = 0;
     /* Figure out where each of the incoming setup attributes lands. */
     if (devinfo->gen >= 6) {
-      if (_mesa_bitcount_64(prog->InputsRead &
+      if (_mesa_bitcount_64(nir->info.inputs_read &
                              BRW_FS_VARYING_INPUT_MASK) <= 16) {
           /* The SF/SBE pipeline stage can do arbitrary rearrangement of the
            * first 16 varying inputs, so we can put them wherever we want.
@@ -1428,7 +1414,7 @@ fs_visitor::calculate_urb_setup()
            * a different vertex (or geometry) shader.
            */
           for (unsigned int i = 0; i < VARYING_SLOT_MAX; i++) {
-            if (prog->InputsRead & BRW_FS_VARYING_INPUT_MASK &
+            if (nir->info.inputs_read & BRW_FS_VARYING_INPUT_MASK &
                  BITFIELD64_BIT(i)) {
                 prog_data->urb_setup[i] = urb_next++;
              }
@@ -1442,7 +1428,7 @@ fs_visitor::calculate_urb_setup()
           struct brw_vue_map prev_stage_vue_map;
           brw_compute_vue_map(devinfo, &prev_stage_vue_map,
                               key->input_slots_valid,
-                             shader_prog->SeparateShader);
+                             nir->info.separate_shader);
           int first_slot = 2 * BRW_SF_URB_ENTRY_READ_OFFSET;
           assert(prev_stage_vue_map.num_slots <= first_slot + 32);
           for (int slot = first_slot; slot < prev_stage_vue_map.num_slots;
@@ -1452,7 +1438,7 @@ fs_visitor::calculate_urb_setup()
               * unused.
               */
              if (varying != BRW_VARYING_SLOT_COUNT &&
-                (prog->InputsRead & BRW_FS_VARYING_INPUT_MASK &
+                (nir->info.inputs_read & BRW_FS_VARYING_INPUT_MASK &
                   BITFIELD64_BIT(varying))) {
                 prog_data->urb_setup[varying] = slot - first_slot;
              }
@@ -1485,7 +1471,7 @@ fs_visitor::calculate_urb_setup()
         *
         * See compile_sf_prog() for more info.
         */
-      if (prog->InputsRead & BITFIELD64_BIT(VARYING_SLOT_PNTC))
+      if (nir->info.inputs_read & BITFIELD64_BIT(VARYING_SLOT_PNTC))
           prog_data->urb_setup[VARYING_SLOT_PNTC] = urb_next++;
     }
  
@@ -4537,7 +4523,7 @@ fs_visitor::dump_instruction(backend_instruction *be_inst, FILE *file)
           fprintf(file, "***m%d***", inst->src[i].reg);
           break;
        case ATTR:
-         fprintf(file, "attr%d", inst->src[i].reg + inst->src[i].reg_offset);
+         fprintf(file, "attr%d+%d", inst->src[i].reg, inst->src[i].reg_offset);
           break;
        case UNIFORM:
           fprintf(file, "u%d", inst->src[i].reg + inst->src[i].reg_offset);
@@ -4668,7 +4654,7 @@ void
  fs_visitor::setup_payload_gen6()
  {
     bool uses_depth =
-      (prog->InputsRead & (1 << VARYING_SLOT_POS)) != 0;
+      (nir->info.inputs_read & (1 << VARYING_SLOT_POS)) != 0;
     unsigned barycentric_interp_modes =
        (stage == MESA_SHADER_FRAGMENT) ?
        ((brw_wm_prog_data*) this->prog_data)->barycentric_interp_modes : 0;
@@ -4727,7 +4713,7 @@ fs_visitor::setup_payload_gen6()
     }
  
     /* R32: MSAA input coverage mask */
-   if (prog->SystemValuesRead & SYSTEM_BIT_SAMPLE_MASK_IN) {
+   if (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_MASK_IN) {
        assert(devinfo->gen >= 7);
        payload.sample_mask_in_reg = payload.num_regs;
        payload.num_regs++;
@@ -4740,7 +4726,7 @@ fs_visitor::setup_payload_gen6()
     /* R34-: bary for 32-pixel. */
     /* R58-59: interp W for 32-pixel. */
  
-   if (prog->OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
+   if (nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
        source_depth_to_render_target = true;
     }
  }
@@ -4759,9 +4745,9 @@ fs_visitor::setup_cs_payload()
  
     payload.num_regs = 1;
  
-   if (prog->SystemValuesRead & SYSTEM_BIT_LOCAL_INVOCATION_ID) {
+   if (nir->info.system_values_read & SYSTEM_BIT_LOCAL_INVOCATION_ID) {
        const unsigned local_id_dwords =
-         brw_cs_prog_local_id_payload_dwords(prog, dispatch_width);
+         brw_cs_prog_local_id_payload_dwords(dispatch_width);
        assert((local_id_dwords & 0x7) == 0);
        const unsigned local_id_regs = local_id_dwords / 8;
        payload.local_invocation_id_reg = payload.num_regs;
@@ -4769,37 +4755,6 @@ fs_visitor::setup_cs_payload()
     }
  }
  
-void
-fs_visitor::assign_fs_binding_table_offsets()
-{
-   assert(stage == MESA_SHADER_FRAGMENT);
-   brw_wm_prog_data *prog_data = (brw_wm_prog_data*) this->prog_data;
-   brw_wm_prog_key *key = (brw_wm_prog_key*) this->key;
-   uint32_t next_binding_table_offset = 0;
-
-   /* If there are no color regions, we still perform an FB write to a null
-    * renderbuffer, which we place at surface index 0.
-    */
-   prog_data->binding_table.render_target_start = next_binding_table_offset;
-   next_binding_table_offset += MAX2(key->nr_color_regions, 1);
-
-   assign_common_binding_table_offsets(next_binding_table_offset);
-}
-
-void
-fs_visitor::assign_cs_binding_table_offsets()
-{
-   assert(stage == MESA_SHADER_COMPUTE);
-   brw_cs_prog_data *prog_data = (brw_cs_prog_data*) this->prog_data;
-   uint32_t next_binding_table_offset = 0;
-
-   /* May not be used if the gl_NumWorkGroups variable is not accessed. */
-   prog_data->binding_table.work_groups_start = next_binding_table_offset;
-   next_binding_table_offset++;
-
-   assign_common_binding_table_offsets(next_binding_table_offset);
-}
-
  void
  fs_visitor::calculate_register_pressure()
  {
@@ -4851,8 +4806,8 @@ fs_visitor::optimize()
                                                                          \
        if (unlikely(INTEL_DEBUG & DEBUG_OPTIMIZER) && this_progress) {   \
           char filename[64];                                             \
-         snprintf(filename, 64, "%s%d-%04d-%02d-%02d-" #pass,              \
-                  stage_abbrev, dispatch_width, shader_prog ? shader_prog->Name : 0, iteration, pass_num); \
+         snprintf(filename, 64, "%s%d-%s-%02d-%02d-" #pass,              \
+                  stage_abbrev, dispatch_width, nir->info.name, iteration, pass_num); \
                                                                          \
           backend_shader::dump_instructions(filename);                   \
        }                                                                 \
@@ -4865,9 +4820,8 @@ fs_visitor::optimize()
  
     if (unlikely(INTEL_DEBUG & DEBUG_OPTIMIZER)) {
        char filename[64];
-      snprintf(filename, 64, "%s%d-%04d-00-start",
-               stage_abbrev, dispatch_width,
-               shader_prog ? shader_prog->Name : 0);
+      snprintf(filename, 64, "%s%d-%s-00-start",
+               stage_abbrev, dispatch_width, nir->info.name);
  
        backend_shader::dump_instructions(filename);
     }
@@ -5013,8 +4967,6 @@ fs_visitor::run_vs(gl_clip_plane *clip_planes)
  {
     assert(stage == MESA_SHADER_VERTEX);
  
-   if (prog_data->map_entries == NULL)
-      assign_common_binding_table_offsets(0);
     setup_vs_payload();
  
     if (shader_time_index >= 0)
@@ -5053,11 +5005,6 @@ fs_visitor::run_fs(bool do_rep_send)
  
     assert(stage == MESA_SHADER_FRAGMENT);
  
-   sanity_param_count = prog->Parameters->NumParameters;
-
-   if (prog_data->map_entries == NULL)
-      assign_fs_binding_table_offsets();
-
     if (devinfo->gen >= 6)
        setup_payload_gen6();
     else
@@ -5073,7 +5020,7 @@ fs_visitor::run_fs(bool do_rep_send)
           emit_shader_time_begin();
  
        calculate_urb_setup();
-      if (prog->InputsRead > 0) {
+      if (nir->info.inputs_read > 0) {
           if (devinfo->gen < 6)
              emit_interpolation_setup_gen4();
           else
@@ -5133,11 +5080,6 @@ bool
  fs_visitor::run_cs()
  {
     assert(stage == MESA_SHADER_COMPUTE);
-   assert(shader);
-
-   sanity_param_count = prog->Parameters->NumParameters;
-
-   assign_cs_binding_table_offsets();
  
     setup_cs_payload();
  
@@ -5166,13 +5108,6 @@ fs_visitor::run_cs()
     if (failed)
        return false;
  
-   /* If any state parameters were appended, then ParameterValues could have
-    * been realloced, in which case the driver uniform storage set up by
-    * _mesa_associate_uniform_storage() would point to freed memory.  Make
-    * sure that didn't happen.
-    */
-   assert(sanity_param_count == prog->Parameters->NumParameters);
-
     return !failed;
  }
  
@@ -5200,9 +5135,8 @@ brw_wm_fs_emit(struct brw_context *brw,
  
     /* Now the main event: Visit the shader IR and generate our FS IR for it.
      */
-   fs_visitor v(brw->intelScreen->compiler, brw,
-                mem_ctx, MESA_SHADER_FRAGMENT, key, &prog_data->base,
-                prog, &fp->Base, 8, st_index8);
+   fs_visitor v(brw->intelScreen->compiler, brw, mem_ctx, key,
+                &prog_data->base, &fp->Base, fp->Base.nir, 8, st_index8);
     if (!v.run_fs(false /* do_rep_send */)) {
        if (prog) {
           prog->LinkStatus = false;
@@ -5216,9 +5150,8 @@ brw_wm_fs_emit(struct brw_context *brw,
     }
  
     cfg_t *simd16_cfg = NULL;
-   fs_visitor v2(brw->intelScreen->compiler, brw,
-                 mem_ctx, MESA_SHADER_FRAGMENT, key, &prog_data->base,
-                 prog, &fp->Base, 16, st_index16);
+   fs_visitor v2(brw->intelScreen->compiler, brw, mem_ctx, key,
+                 &prog_data->base, &fp->Base, fp->Base.nir, 16, st_index16);
     if (likely(!(INTEL_DEBUG & DEBUG_NO16) || brw->use_rep_send)) {
        if (!v.simd16_unsupported) {
           /* Try a SIMD16 compile */
@@ -5332,9 +5265,8 @@ brw_cs_emit(struct brw_context *brw,
  
     /* Now the main event: Visit the shader IR and generate our CS IR for it.
      */
-   fs_visitor v8(brw->intelScreen->compiler, brw,
-                 mem_ctx, MESA_SHADER_COMPUTE, key, &prog_data->base, prog,
-                 &cp->Base, 8, st_index);
+   fs_visitor v8(brw->intelScreen->compiler, brw, mem_ctx, key,
+                 &prog_data->base, &cp->Base, cp->Base.nir, 8, st_index);
     if (!v8.run_cs()) {
        fail_msg = v8.fail_msg;
     } else if (local_workgroup_size <= 8 * brw->max_cs_threads) {
@@ -5342,9 +5274,8 @@ brw_cs_emit(struct brw_context *brw,
        prog_data->simd_size = 8;
     }
  
-   fs_visitor v16(brw->intelScreen->compiler, brw,
-                  mem_ctx, MESA_SHADER_COMPUTE, key, &prog_data->base, prog,
-                  &cp->Base, 16, st_index);
+   fs_visitor v16(brw->intelScreen->compiler, brw, mem_ctx, key,
+                  &prog_data->base, &cp->Base, cp->Base.nir, 16, st_index);
     if (likely(!(INTEL_DEBUG & DEBUG_NO16)) &&
         !fail_msg && !v8.simd16_unsupported &&
         local_workgroup_size <= 16 * brw->max_cs_threads) {
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h

index a8b6726b9f0c39ebca4fecc9178cf05f4f5afeab..e8b511f9ce67c51472be2dab3709c3bd6ace252a 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -93,11 +93,10 @@ class fs_visitor : public backend_shader
  public:
     fs_visitor(const struct brw_compiler *compiler, void *log_data,
                void *mem_ctx,
-              gl_shader_stage stage,
                const void *key,
                struct brw_stage_prog_data *prog_data,
-              struct gl_shader_program *shader_prog,
                struct gl_program *prog,
+              nir_shader *shader,
                unsigned dispatch_width,
                int shader_time_index);
  
@@ -128,8 +127,6 @@ public:
     bool run_cs();
     void optimize();
     void allocate_registers();
-   void assign_fs_binding_table_offsets();
-   void assign_cs_binding_table_offsets();
     void setup_payload_gen4();
     void setup_payload_gen6();
     void setup_vs_payload();
@@ -207,7 +204,7 @@ public:
     void emit_interpolation_setup_gen6();
     void compute_sample_position(fs_reg dst, fs_reg int_sample_pos);
     fs_reg rescale_texcoord(fs_reg coordinate, int coord_components,
-                           bool is_rect, uint32_t sampler, int texunit);
+                           bool is_rect, uint32_t sampler);
     void emit_texture(ir_texture_opcode op,
                       const glsl_type *dest_type,
                       fs_reg coordinate, int components,
@@ -220,8 +217,7 @@ public:
                       bool is_cube_array,
                       bool is_rect,
                       uint32_t sampler,
-                     fs_reg sampler_reg,
-                     int texunit);
+                     fs_reg sampler_reg);
     fs_reg emit_mcs_fetch(const fs_reg &coordinate, unsigned components,
                           const fs_reg &sampler);
     void emit_gen6_gather_wa(uint8_t wa, fs_reg dst);
@@ -238,12 +234,10 @@ public:
                     uint32_t spill_offset, int count);
  
     void emit_nir_code();
-   void nir_setup_inputs(nir_shader *shader);
-   void nir_setup_outputs(nir_shader *shader);
-   void nir_setup_uniforms(nir_shader *shader);
-   void nir_setup_uniform(nir_variable *var);
-   void nir_setup_builtin_uniform(nir_variable *var);
-   void nir_emit_system_values(nir_shader *shader);
+   void nir_setup_inputs();
+   void nir_setup_outputs();
+   void nir_setup_uniforms();
+   void nir_emit_system_values();
     void nir_emit_impl(nir_function_impl *impl);
     void nir_emit_cf_list(exec_list *list);
     void nir_emit_if(nir_if *if_stmt);
@@ -294,10 +288,6 @@ public:
  
     struct brw_reg interp_reg(int location, int channel);
  
-   virtual void setup_vec4_uniform_value(unsigned param_offset,
-                                         const gl_constant_value *values,
-                                         unsigned n);
-
     int implied_mrf_writes(fs_inst *inst);
  
     virtual void dump_instructions();
@@ -309,7 +299,7 @@ public:
     const struct brw_sampler_prog_key_data *key_tex;
  
     struct brw_stage_prog_data *prog_data;
-   unsigned int sanity_param_count;
+   struct gl_program *prog;
  
     int *param_size;
  
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp

index 61d0c896b8ea50e9b77076fdf7140ff713106568..8a03597c72bb0cf8e9acb525dd1eb67b84709d82 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -37,16 +37,13 @@ using namespace brw::surface_access;
  void
  fs_visitor::emit_nir_code()
  {
-   nir_shader *nir = prog->nir;
-
     /* emit the arrays used for inputs and outputs - load/store intrinsics will
      * be converted to reads/writes of these arrays
      */
-   nir_setup_inputs(nir);
-   nir_setup_outputs(nir);
-   uniforms = nir->num_uniforms;
-   //nir_setup_uniforms(nir);
-   nir_emit_system_values(nir);
+   nir_setup_inputs();
+   nir_setup_outputs();
+   nir_setup_uniforms();
+   nir_emit_system_values();
  
     /* get the main function and emit it */
     nir_foreach_overload(nir, overload) {
@@ -57,11 +54,11 @@ fs_visitor::emit_nir_code()
  }
  
  void
-fs_visitor::nir_setup_inputs(nir_shader *shader)
+fs_visitor::nir_setup_inputs()
  {
-   nir_inputs = bld.vgrf(BRW_REGISTER_TYPE_F, shader->num_inputs);
+   nir_inputs = bld.vgrf(BRW_REGISTER_TYPE_F, nir->num_inputs);
  
-   foreach_list_typed(nir_variable, var, node, &shader->inputs) {
+   foreach_list_typed(nir_variable, var, node, &nir->inputs) {
        enum brw_reg_type type = brw_type_for_base_type(var->type);
        fs_reg input = offset(nir_inputs, bld, var->data.driver_location);
  
@@ -119,13 +116,13 @@ fs_visitor::nir_setup_inputs(nir_shader *shader)
  }
  
  void
-fs_visitor::nir_setup_outputs(nir_shader *shader)
+fs_visitor::nir_setup_outputs()
  {
     brw_wm_prog_key *key = (brw_wm_prog_key*) this->key;
  
-   nir_outputs = bld.vgrf(BRW_REGISTER_TYPE_F, shader->num_outputs);
+   nir_outputs = bld.vgrf(BRW_REGISTER_TYPE_F, nir->num_outputs);
  
-   foreach_list_typed(nir_variable, var, node, &shader->outputs) {
+   foreach_list_typed(nir_variable, var, node, &nir->outputs) {
        fs_reg reg = offset(nir_outputs, bld, var->data.driver_location);
  
        int vector_elements =
@@ -176,108 +173,20 @@ fs_visitor::nir_setup_outputs(nir_shader *shader)
  }
  
  void
-fs_visitor::nir_setup_uniforms(nir_shader *shader)
+fs_visitor::nir_setup_uniforms()
  {
     if (dispatch_width != 8)
        return;
  
-   uniforms = shader->num_uniforms;
-
-   if (shader_prog) {
-      foreach_list_typed(nir_variable, var, node, &shader->uniforms) {
-         /* UBO's and atomics don't take up space in the uniform file */
-         if (var->interface_type != NULL || var->type->contains_atomic())
-            continue;
-
-         if (strncmp(var->name, "gl_", 3) == 0)
-            nir_setup_builtin_uniform(var);
-         else
-            nir_setup_uniform(var);
-         if(type_size_scalar(var->type) > 0)
-            param_size[var->data.driver_location] = type_size_scalar(var->type);
-      }
-   } else {
-      /* prog_to_nir only creates a single giant uniform variable so we can
-       * just set param up directly. */
-      for (unsigned p = 0; p < prog->Parameters->NumParameters; p++) {
-         for (unsigned int i = 0; i < 4; i++) {
-            stage_prog_data->param[4 * p + i] =
-               &prog->Parameters->ParameterValues[p][i];
-         }
-      }
-      if(prog->Parameters->NumParameters > 0)
-         param_size[0] = prog->Parameters->NumParameters * 4;
-   }
-}
+   uniforms = nir->num_uniforms;
  
-void
-fs_visitor::nir_setup_uniform(nir_variable *var)
-{
-   int namelen = strlen(var->name);
-
-   /* The data for our (non-builtin) uniforms is stored in a series of
-      * gl_uniform_driver_storage structs for each subcomponent that
-      * glGetUniformLocation() could name.  We know it's been set up in the
-      * same order we'd walk the type, so walk the list of storage and find
-      * anything with our name, or the prefix of a component that starts with
-      * our name.
-      */
-   unsigned index = var->data.driver_location;
-   for (unsigned u = 0; u < shader_prog->NumUniformStorage; u++) {
-      struct gl_uniform_storage *storage = &shader_prog->UniformStorage[u];
-
-      if (storage->builtin)
-              continue;
-
-      if (strncmp(var->name, storage->name, namelen) != 0 ||
-         (storage->name[namelen] != 0 &&
-         storage->name[namelen] != '.' &&
-         storage->name[namelen] != '[')) {
+   foreach_list_typed(nir_variable, var, node, &nir->uniforms) {
+      /* UBO's and atomics don't take up space in the uniform file */
+      if (var->interface_type != NULL || var->type->contains_atomic())
           continue;
-      }
-
-      if (storage->type->is_image()) {
-         setup_image_uniform_values(index, storage);
-      } else {
-         unsigned slots = storage->type->component_slots();
-         if (storage->array_elements)
-            slots *= storage->array_elements;
  
-         for (unsigned i = 0; i < slots; i++) {
-            stage_prog_data->param[index++] = &storage->storage[i];
-         }
-      }
-   }
-}
-
-void
-fs_visitor::nir_setup_builtin_uniform(nir_variable *var)
-{
-   const nir_state_slot *const slots = var->state_slots;
-   assert(var->state_slots != NULL);
-
-   unsigned uniform_index = var->data.driver_location;
-   for (unsigned int i = 0; i < var->num_state_slots; i++) {
-      /* This state reference has already been setup by ir_to_mesa, but we'll
-       * get the same index back here.
-       */
-      int index = _mesa_add_state_reference(this->prog->Parameters,
-                                            (gl_state_index *)slots[i].tokens);
-
-      /* Add each of the unique swizzles of the element as a parameter.
-       * This'll end up matching the expected layout of the
-       * array/matrix/structure we're trying to fill in.
-       */
-      int last_swiz = -1;
-      for (unsigned int j = 0; j < 4; j++) {
-         int swiz = GET_SWZ(slots[i].swizzle, j);
-         if (swiz == last_swiz)
-            break;
-         last_swiz = swiz;
-
-         stage_prog_data->param[uniform_index++] =
-            &prog->Parameters->ParameterValues[index][swiz];
-      }
+      if (type_size_scalar(var->type) > 0)
+         param_size[var->data.driver_location] = type_size_scalar(var->type);
     }
  }
  
@@ -363,10 +272,10 @@ emit_system_values_block(nir_block *block, void *void_visitor)
  }
  
  void
-fs_visitor::nir_emit_system_values(nir_shader *shader)
+fs_visitor::nir_emit_system_values()
  {
     nir_system_values = ralloc_array(mem_ctx, fs_reg, SYSTEM_VALUE_MAX);
-   nir_foreach_overload(shader, overload) {
+   nir_foreach_overload(nir, overload) {
        assert(strcmp(overload->function->name, "main") == 0);
        assert(overload->impl);
        nir_foreach_block(overload->impl, emit_system_values_block, this);
@@ -1540,7 +1449,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
            */
           brw_mark_surface_used(prog_data,
                                 stage_prog_data->binding_table.ubo_start +
-                               shader_prog->NumBufferInterfaceBlocks - 1);
+                               nir->info.num_ssbos - 1);
        }
  
        if (has_indirect) {
@@ -1603,7 +1512,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
            */
           brw_mark_surface_used(prog_data,
                                 stage_prog_data->binding_table.ubo_start +
-                               shader_prog->NumBufferInterfaceBlocks - 1);
+                               nir->info.num_ssbos - 1);
        }
  
        /* Get the offset to read from */
@@ -1796,7 +1705,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
  
           brw_mark_surface_used(prog_data,
                                 stage_prog_data->binding_table.ubo_start +
-                               shader_prog->NumBufferInterfaceBlocks - 1);
+                               nir->info.num_ssbos - 1);
        }
  
        /* Offset */
@@ -1913,8 +1822,6 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
        unsigned ubo_index = const_uniform_block ? const_uniform_block->u[0] : 0;
        int reg_width = dispatch_width / 8;
  
-      assert(shader->base.UniformBlocks[ubo_index].IsShaderStorage);
-
        /* Set LOD = 0 */
        fs_reg source = fs_reg(0);
  
@@ -1990,7 +1897,7 @@ fs_visitor::nir_emit_ssbo_atomic(const fs_builder &bld,
         */
        brw_mark_surface_used(prog_data,
                              stage_prog_data->binding_table.ubo_start +
-                            shader_prog->NumBufferInterfaceBlocks - 1);
+                            nir->info.num_ssbos - 1);
     }
  
     fs_reg offset = get_nir_src(instr->src[1]);
@@ -2023,12 +1930,6 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr)
     unsigned sampler = stage_prog_data->bind_map[set].index[binding];
     fs_reg sampler_reg(sampler);
  
-   /* FINISHME: We're failing to recompile our programs when the sampler is
-    * updated.  This only matters for the texture rectangle scale parameters
-    * (pre-gen6, or gen6+ with GL_CLAMP).
-    */
-   int texunit = prog->SamplerUnits[sampler];
-
     int gather_component = instr->component;
  
     bool is_rect = instr->sampler_dim == GLSL_SAMPLER_DIM_RECT;
@@ -2169,7 +2070,7 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr)
     emit_texture(op, dest_type, coordinate, instr->coord_components,
                  shadow_comparitor, lod, lod2, lod_components, sample_index,
                  tex_offset, mcs, gather_component,
-                is_cube_array, is_rect, sampler, sampler_reg, texunit);
+                is_cube_array, is_rect, sampler, sampler_reg);
  
     fs_reg dest = get_nir_dest(instr->dest);
     dest.type = this->result.type;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp

index 47d7ae4f57c56f39c70b68e1a0ad47e8af2d51c8..df1a7ed9b5904e512fa72ff359e3947e7361a9cf 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -79,7 +79,7 @@ fs_visitor::emit_vs_system_value(int location)
  
  fs_reg
  fs_visitor::rescale_texcoord(fs_reg coordinate, int coord_components,
-                             bool is_rect, uint32_t sampler, int texunit)
+                             bool is_rect, uint32_t sampler)
  {
     bool needs_gl_clamp = true;
     fs_reg scale_x, scale_y;
@@ -93,10 +93,16 @@ fs_visitor::rescale_texcoord(fs_reg coordinate, int coord_components,
          (devinfo->gen >= 6 && (key_tex->gl_clamp_mask[0] & (1 << sampler) ||
                                 key_tex->gl_clamp_mask[1] & (1 << sampler))))) {
        struct gl_program_parameter_list *params = prog->Parameters;
+
+
+      /* FINISHME: We're failing to recompile our programs when the sampler is
+       * updated.  This only matters for the texture rectangle scale
+       * parameters (pre-gen6, or gen6+ with GL_CLAMP).
+       */
        int tokens[STATE_LENGTH] = {
          STATE_INTERNAL,
          STATE_TEXRECT_SCALE,
-        texunit,
+        prog->SamplerUnits[sampler],
          0,
          0
        };
@@ -221,7 +227,7 @@ fs_visitor::emit_texture(ir_texture_opcode op,
                           bool is_cube_array,
                           bool is_rect,
                           uint32_t sampler,
-                         fs_reg sampler_reg, int texunit)
+                         fs_reg sampler_reg)
  {
     fs_inst *inst = NULL;
  
@@ -256,7 +262,7 @@ fs_visitor::emit_texture(ir_texture_opcode op,
         * samplers.  This should only be a problem with GL_CLAMP on Gen7.
         */
        coordinate = rescale_texcoord(coordinate, coord_components, is_rect,
-                                    sampler, texunit);
+                                    sampler);
     }
  
     /* Writemasking doesn't eliminate channels on SIMD8 texture
@@ -692,7 +698,7 @@ fs_visitor::emit_single_fb_write(const fs_builder &bld,
     fs_reg src_depth;
  
     if (source_depth_to_render_target) {
-      if (prog->OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH))
+      if (nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DEPTH))
           src_depth = frag_depth;
        else
           src_depth = fs_reg(brw_vec8_grf(payload.source_depth_reg, 0));
@@ -1060,16 +1066,14 @@ fs_visitor::emit_barrier()
  
  fs_visitor::fs_visitor(const struct brw_compiler *compiler, void *log_data,
                         void *mem_ctx,
-                       gl_shader_stage stage,
                         const void *key,
                         struct brw_stage_prog_data *prog_data,
-                       struct gl_shader_program *shader_prog,
                         struct gl_program *prog,
+                       nir_shader *shader,
                         unsigned dispatch_width,
                         int shader_time_index)
-   : backend_shader(compiler, log_data, mem_ctx,
-                    shader_prog, prog, prog_data, stage),
-     key(key), prog_data(prog_data),
+   : backend_shader(compiler, log_data, mem_ctx, shader, prog_data),
+     key(key), prog_data(prog_data), prog(prog),
       dispatch_width(dispatch_width),
       shader_time_index(shader_time_index),
       promoted_constants(0),
diff --git a/src/mesa/drivers/dri/i965/brw_gs.c b/src/mesa/drivers/dri/i965/brw_gs.c

index 0119a906e9ea1de3c379107bce595b2700443f64..4d0b125ffe4024b651bd06ebfb086f16fe2b42d2 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_gs.c
+++ b/src/mesa/drivers/dri/i965/brw_gs.c
@@ -32,6 +32,24 @@
  #include "brw_vec4_gs_visitor.h"
  #include "brw_state.h"
  #include "brw_ff_gs.h"
+#include "brw_nir.h"
+
+static void
+assign_gs_binding_table_offsets(const struct brw_device_info *devinfo,
+                                const struct gl_shader_program *shader_prog,
+                                const struct gl_program *prog,
+                                struct brw_gs_prog_data *prog_data)
+{
+   /* In gen6 we reserve the first BRW_MAX_SOL_BINDINGS entries for transform
+    * feedback surfaces.
+    */
+   uint32_t reserved = devinfo->gen == 6 ? BRW_MAX_SOL_BINDINGS : 0;
+
+   brw_assign_common_binding_table_offsets(MESA_SHADER_GEOMETRY, devinfo,
+                                           shader_prog, prog,
+                                           &prog_data->base.base,
+                                           reserved);
+}
  
  bool
  brw_compile_gs_prog(struct brw_context *brw,
@@ -55,6 +73,9 @@ brw_compile_gs_prog(struct brw_context *brw,
  
     c.prog_data.invocations = gp->program.Invocations;
  
+   assign_gs_binding_table_offsets(brw->intelScreen->devinfo, prog,
+                                   &gp->program.Base, &c.prog_data);
+
     /* Allocate the references to the uniforms that will end up in the
      * prog_data associated with the compiled program, and which will be freed
      * by the state cache.
@@ -64,9 +85,7 @@ brw_compile_gs_prog(struct brw_context *brw,
      * every uniform is a float which gets padded to the size of a vec4.
      */
     struct gl_shader *gs = prog->_LinkedShaders[MESA_SHADER_GEOMETRY];
-   int param_count = gs->num_uniform_components * 4;
-
-   param_count += gs->NumImages * BRW_IMAGE_PARAM_SIZE;
+   int param_count = gp->program.Base.nir->num_uniforms * 4;
  
     c.prog_data.base.base.param =
        rzalloc_array(NULL, const gl_constant_value *, param_count);
@@ -77,6 +96,9 @@ brw_compile_gs_prog(struct brw_context *brw,
     c.prog_data.base.base.nr_params = param_count;
     c.prog_data.base.base.nr_image_params = gs->NumImages;
  
+   brw_nir_setup_glsl_uniforms(gp->program.Base.nir, prog, &gp->program.Base,
+                               &c.prog_data.base.base, false);
+
     if (brw->gen >= 8) {
        c.prog_data.static_vertex_count = !gp->program.Base.nir ? -1 :
           nir_gs_count_vertices(gp->program.Base.nir);
@@ -418,24 +440,3 @@ brw_gs_precompile(struct gl_context *ctx,
  
     return success;
  }
-
-
-bool
-brw_gs_prog_data_compare(const void *in_a, const void *in_b)
-{
-   const struct brw_gs_prog_data *a = in_a;
-   const struct brw_gs_prog_data *b = in_b;
-
-   /* Compare the base structure. */
-   if (!brw_stage_prog_data_compare(&a->base.base, &b->base.base))
-      return false;
-
-   /* Compare the rest of the struct. */
-   const unsigned offset = sizeof(struct brw_stage_prog_data);
-   if (memcmp(((char *) a) + offset, ((char *) b) + offset,
-              sizeof(struct brw_gs_prog_data) - offset)) {
-      return false;
-   }
-
-   return true;
-}
diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c

index 35855092dfff4f8cc4f69bd4a96d527d588f4b9f..bae44d5aa249bcb7026af68c7c02bb00fd1fd121 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_nir.c
+++ b/src/mesa/drivers/dri/i965/brw_nir.c
@@ -27,6 +27,24 @@
  #include "glsl/nir/glsl_to_nir.h"
  #include "program/prog_to_nir.h"
  
+static void
+brw_nir_lower_inputs(nir_shader *nir, bool is_scalar)
+{
+   nir_assign_var_locations(&nir->inputs, &nir->num_inputs,
+                            is_scalar ? type_size_scalar : type_size_vec4);
+}
+
+static void
+brw_nir_lower_outputs(nir_shader *nir, bool is_scalar)
+{
+   if (is_scalar) {
+      nir_assign_var_locations(&nir->outputs, &nir->num_outputs, type_size_scalar);
+   } else {
+      foreach_list_typed(nir_variable, var, node, &nir->outputs)
+         var->data.driver_location = var->data.location;
+   }
+}
+
  static void
  nir_optimize(nir_shader *nir, bool is_scalar)
  {
@@ -80,12 +98,11 @@ brw_create_nir(struct brw_context *brw,
     struct gl_context *ctx = &brw->ctx;
     const nir_shader_compiler_options *options =
        ctx->Const.ShaderCompilerOptions[stage].NirOptions;
-   struct gl_shader *shader = shader_prog ? shader_prog->_LinkedShaders[stage] : NULL;
     nir_shader *nir;
  
     /* First, lower the GLSL IR or Mesa IR to NIR */
     if (shader_prog) {
-      nir = glsl_to_nir(shader, options);
+      nir = glsl_to_nir(shader_prog, stage, options);
     } else {
        nir = prog_to_nir(prog, options);
        nir_convert_to_ssa(nir); /* turn registers into SSA */
@@ -142,26 +159,12 @@ brw_process_nir(nir_shader *nir,
     /* Get rid of split copies */
     nir_optimize(nir, is_scalar);
  
-   if (is_scalar) {
-      nir_assign_var_locations(&nir->uniforms,
-                               &nir->num_uniforms,
-                               type_size_scalar);
-      nir_assign_var_locations(&nir->inputs, &nir->num_inputs, type_size_scalar);
-      nir_assign_var_locations(&nir->outputs, &nir->num_outputs, type_size_scalar);
-      nir_lower_io(nir, type_size_scalar);
-   } else {
-      nir_assign_var_locations(&nir->uniforms,
-                               &nir->num_uniforms,
-                               type_size_vec4);
-
-      nir_assign_var_locations(&nir->inputs, &nir->num_inputs, type_size_vec4);
-
-      foreach_list_typed(nir_variable, var, node, &nir->outputs)
-         var->data.driver_location = var->data.location;
-
-      nir_lower_io(nir, type_size_vec4);
-   }
-
+   brw_nir_lower_inputs(nir, is_scalar);
+   brw_nir_lower_outputs(nir, is_scalar);
+   nir_assign_var_locations(&nir->uniforms,
+                            &nir->num_uniforms,
+                            is_scalar ? type_size_scalar : type_size_vec4);
+   nir_lower_io(nir, -1, is_scalar ? type_size_scalar : type_size_vec4);
     nir_validate_shader(nir);
  
     nir_remove_dead_variables(nir);
diff --git a/src/mesa/drivers/dri/i965/brw_nir.h b/src/mesa/drivers/dri/i965/brw_nir.h

index 5a1358890cc87555cbf8f089f29e65562eb8bdb3..19e5552754584f62fca91337f9b82139d4287802 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_nir.h
+++ b/src/mesa/drivers/dri/i965/brw_nir.h
@@ -91,6 +91,15 @@ brw_process_nir(nir_shader *nir,
                  const struct gl_shader_program *shader_prog,
                  gl_shader_stage stage, bool is_scalar);
  
+void brw_nir_setup_glsl_uniforms(nir_shader *shader,
+                                 struct gl_shader_program *shader_prog,
+                                 const struct gl_program *prog,
+                                 struct brw_stage_prog_data *stage_prog_data,
+                                 bool is_scalar);
+
+void brw_nir_setup_arb_uniforms(nir_shader *shader, struct gl_program *prog,
+                                struct brw_stage_prog_data *stage_prog_data);
+
  #ifdef __cplusplus
  }
  #endif
diff --git a/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp b/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp

new file mode 100644 (file)

index 0000000..f19d746
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp
@@ -0,0 +1,182 @@
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "brw_shader.h"
+#include "brw_nir.h"
+#include "glsl/ir.h"
+#include "glsl/ir_uniform.h"
+
+static void
+brw_nir_setup_glsl_builtin_uniform(nir_variable *var,
+                                   const struct gl_program *prog,
+                                   struct brw_stage_prog_data *stage_prog_data,
+                                   unsigned comps_per_unit)
+{
+   const nir_state_slot *const slots = var->state_slots;
+   assert(var->state_slots != NULL);
+
+   unsigned uniform_index = var->data.driver_location * comps_per_unit;
+   for (unsigned int i = 0; i < var->num_state_slots; i++) {
+      /* This state reference has already been setup by ir_to_mesa, but we'll
+       * get the same index back here.
+       */
+      int index = _mesa_add_state_reference(prog->Parameters,
+                                           (gl_state_index *)slots[i].tokens);
+
+      /* Add each of the unique swizzles of the element as a parameter.
+       * This'll end up matching the expected layout of the
+       * array/matrix/structure we're trying to fill in.
+       */
+      int last_swiz = -1;
+      for (unsigned j = 0; j < 4; j++) {
+         int swiz = GET_SWZ(slots[i].swizzle, j);
+
+         /* If we hit a pair of identical swizzles, this means we've hit the
+          * end of the builtin variable.  In scalar mode, we should just quit
+          * and move on to the next one.  In vec4, we need to continue and pad
+          * it out to 4 components.
+          */
+         if (swiz == last_swiz && comps_per_unit == 1)
+            break;
+
+         last_swiz = swiz;
+
+         stage_prog_data->param[uniform_index++] =
+            &prog->Parameters->ParameterValues[index][swiz];
+      }
+   }
+}
+
+static void
+brw_nir_setup_glsl_uniform(gl_shader_stage stage, nir_variable *var,
+                           struct gl_shader_program *shader_prog,
+                           struct brw_stage_prog_data *stage_prog_data,
+                           unsigned comps_per_unit)
+{
+   int namelen = strlen(var->name);
+
+   /* The data for our (non-builtin) uniforms is stored in a series of
+    * gl_uniform_driver_storage structs for each subcomponent that
+    * glGetUniformLocation() could name.  We know it's been set up in the same
+    * order we'd walk the type, so walk the list of storage and find anything
+    * with our name, or the prefix of a component that starts with our name.
+    */
+   unsigned uniform_index = var->data.driver_location * comps_per_unit;
+   for (unsigned u = 0; u < shader_prog->NumUniformStorage; u++) {
+      struct gl_uniform_storage *storage = &shader_prog->UniformStorage[u];
+
+      if (storage->builtin)
+         continue;
+
+      if (strncmp(var->name, storage->name, namelen) != 0 ||
+          (storage->name[namelen] != 0 &&
+           storage->name[namelen] != '.' &&
+           storage->name[namelen] != '[')) {
+         continue;
+      }
+
+      if (storage->type->is_image()) {
+         brw_setup_image_uniform_values(stage, stage_prog_data,
+                                        uniform_index, storage);
+      } else {
+         gl_constant_value *components = storage->storage;
+         unsigned vector_count = (MAX2(storage->array_elements, 1) *
+                                  storage->type->matrix_columns);
+         unsigned vector_size = storage->type->vector_elements;
+
+         for (unsigned s = 0; s < vector_count; s++) {
+            unsigned i;
+            for (i = 0; i < vector_size; i++) {
+               stage_prog_data->param[uniform_index++] = components++;
+            }
+
+            /* Pad out with zeros if needed (only needed for vec4) */
+            for (; i < comps_per_unit; i++) {
+               static const gl_constant_value zero = { 0.0 };
+               stage_prog_data->param[uniform_index++] = &zero;
+            }
+         }
+      }
+   }
+}
+
+void
+brw_nir_setup_glsl_uniforms(nir_shader *shader,
+                            struct gl_shader_program *shader_prog,
+                            const struct gl_program *prog,
+                            struct brw_stage_prog_data *stage_prog_data,
+                            bool is_scalar)
+{
+   unsigned comps_per_unit = is_scalar ? 1 : 4;
+
+   foreach_list_typed(nir_variable, var, node, &shader->uniforms) {
+      /* UBO's, atomics and samplers don't take up space in the
+         uniform file */
+      if (var->interface_type != NULL || var->type->contains_atomic())
+         continue;
+
+      if (strncmp(var->name, "gl_", 3) == 0) {
+         brw_nir_setup_glsl_builtin_uniform(var, prog, stage_prog_data,
+                                            comps_per_unit);
+      } else {
+         brw_nir_setup_glsl_uniform(shader->stage, var, shader_prog,
+                                    stage_prog_data, comps_per_unit);
+      }
+   }
+}
+
+void
+brw_nir_setup_arb_uniforms(nir_shader *shader, struct gl_program *prog,
+                           struct brw_stage_prog_data *stage_prog_data)
+{
+   struct gl_program_parameter_list *plist = prog->Parameters;
+
+#ifndef NDEBUG
+   if (!shader->uniforms.is_empty()) {
+      /* For ARB programs, only a single "parameters" variable is generated to
+       * support uniform data.
+       */
+      assert(shader->uniforms.length() == 1);
+      nir_variable *var = (nir_variable *) shader->uniforms.get_head();
+      assert(strcmp(var->name, "parameters") == 0);
+      assert(var->type->array_size() == (int)plist->NumParameters);
+   }
+#endif
+
+   for (unsigned p = 0; p < plist->NumParameters; p++) {
+      /* Parameters should be either vec4 uniforms or single component
+       * constants; matrices and other larger types should have been broken
+       * down earlier.
+       */
+      assert(plist->Parameters[p].Size <= 4);
+
+      unsigned i;
+      for (i = 0; i < plist->Parameters[p].Size; i++) {
+         stage_prog_data->param[4 * p + i] = &plist->ParameterValues[p][i];
+      }
+      for (; i < 4; i++) {
+         static const gl_constant_value zero = { 0.0 };
+         stage_prog_data->param[4 * p + i] = &zero;
+      }
+   }
+}
diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c

index fa59338950a55fa62da3f9b7500dd7c8054b4070..0a9a99edf2d5b6bccd2f4810b63946a187e9f7f1 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_program.c
+++ b/src/mesa/drivers/dri/i965/brw_program.c
@@ -142,9 +142,7 @@ brwProgramStringNotify(struct gl_context *ctx,
  
        brw_add_texrect_params(prog);
  
-      if (ctx->Const.ShaderCompilerOptions[MESA_SHADER_FRAGMENT].NirOptions) {
-         prog->nir = brw_create_nir(brw, NULL, prog, MESA_SHADER_FRAGMENT, true);
-      }
+      prog->nir = brw_create_nir(brw, NULL, prog, MESA_SHADER_FRAGMENT, true);
  
        brw_fs_precompile(ctx, NULL, prog);
        break;
@@ -168,10 +166,8 @@ brwProgramStringNotify(struct gl_context *ctx,
  
        brw_add_texrect_params(prog);
  
-      if (ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].NirOptions) {
-         prog->nir = brw_create_nir(brw, NULL, prog, MESA_SHADER_VERTEX,
-                                    brw->intelScreen->compiler->scalar_vs);
-      }
+      prog->nir = brw_create_nir(brw, NULL, prog, MESA_SHADER_VERTEX,
+                                 brw->intelScreen->compiler->scalar_vs);
  
        brw_vs_precompile(ctx, NULL, prog);
        break;
@@ -544,23 +540,6 @@ brw_mark_surface_used(struct brw_stage_prog_data *prog_data,
        MAX2(prog_data->binding_table.size_bytes, (surf_index + 1) * 4);
  }
  
-bool
-brw_stage_prog_data_compare(const struct brw_stage_prog_data *a,
-                            const struct brw_stage_prog_data *b)
-{
-   /* Compare all the struct up to the pointers. */
-   if (memcmp(a, b, offsetof(struct brw_stage_prog_data, param)))
-      return false;
-
-   if (memcmp(a->param, b->param, a->nr_params * sizeof(void *)))
-      return false;
-
-   if (memcmp(a->pull_param, b->pull_param, a->nr_pull_params * sizeof(void *)))
-      return false;
-
-   return true;
-}
-
  void
  brw_stage_prog_data_free(const void *p)
  {
@@ -568,6 +547,7 @@ brw_stage_prog_data_free(const void *p)
  
     ralloc_free(prog_data->param);
     ralloc_free(prog_data->pull_param);
+   ralloc_free(prog_data->image_param);
  }
  
  void
diff --git a/src/mesa/drivers/dri/i965/brw_program.h b/src/mesa/drivers/dri/i965/brw_program.h

index 72d68d869fc1e5aa730c3b15057545ffb4d27bef..cf0522a8b10f46e5c85498b9e808ffbdb65e293e 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_program.h
+++ b/src/mesa/drivers/dri/i965/brw_program.h
@@ -169,10 +169,6 @@ void
  brw_mark_surface_used(struct brw_stage_prog_data *prog_data,
                        unsigned surf_index);
  
-bool
-brw_stage_prog_data_compare(const struct brw_stage_prog_data *a,
-                            const struct brw_stage_prog_data *b);
-
  void
  brw_stage_prog_data_free(const void *prog_data);
  
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp

index 1060d93ae6ba328e984db9bb55bd5e54a69f5609..8bc7d9d6aac6fa69154bfe3a6976109cba4cda71 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -72,6 +72,20 @@ shader_perf_log_mesa(void *data, const char *fmt, ...)
     va_end(args);
  }
  
+static bool
+is_scalar_shader_stage(const struct brw_compiler *compiler, int stage)
+{
+   switch (stage) {
+   case MESA_SHADER_FRAGMENT:
+   case MESA_SHADER_COMPUTE:
+      return true;
+   case MESA_SHADER_VERTEX:
+      return compiler->scalar_vs;
+   default:
+      return false;
+   }
+}
+
  struct brw_compiler *
  brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo)
  {
@@ -120,19 +134,7 @@ brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo)
        compiler->glsl_compiler_options[i].EmitNoIndirectUniform = false;
        compiler->glsl_compiler_options[i].LowerClipDistance = true;
  
-      bool is_scalar;
-      switch (i) {
-      case MESA_SHADER_FRAGMENT:
-      case MESA_SHADER_COMPUTE:
-         is_scalar = true;
-         break;
-      case MESA_SHADER_VERTEX:
-         is_scalar = compiler->scalar_vs;
-         break;
-      default:
-         is_scalar = false;
-         break;
-      }
+      bool is_scalar = is_scalar_shader_stage(compiler, i);
  
        compiler->glsl_compiler_options[i].EmitNoIndirectOutput = is_scalar;
        compiler->glsl_compiler_options[i].EmitNoIndirectTemp = is_scalar;
@@ -142,8 +144,7 @@ brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo)
        if (devinfo->gen < 7)
           compiler->glsl_compiler_options[i].EmitNoIndirectSampler = true;
  
-      if (is_scalar || brw_env_var_as_boolean("INTEL_USE_NIR", true))
-         compiler->glsl_compiler_options[i].NirOptions = nir_options;
+      compiler->glsl_compiler_options[i].NirOptions = nir_options;
     }
  
     return compiler;
@@ -194,20 +195,6 @@ brw_shader_precompile(struct gl_context *ctx,
     return true;
  }
  
-static inline bool
-is_scalar_shader_stage(struct brw_context *brw, int stage)
-{
-   switch (stage) {
-   case MESA_SHADER_FRAGMENT:
-   case MESA_SHADER_COMPUTE:
-      return true;
-   case MESA_SHADER_VERTEX:
-      return brw->intelScreen->compiler->scalar_vs;
-   default:
-      return false;
-   }
-}
-
  static void
  brw_lower_packing_builtins(struct brw_context *brw,
                             gl_shader_stage shader_type,
@@ -218,7 +205,7 @@ brw_lower_packing_builtins(struct brw_context *brw,
             | LOWER_PACK_UNORM_2x16
             | LOWER_UNPACK_UNORM_2x16;
  
-   if (is_scalar_shader_stage(brw, shader_type)) {
+   if (is_scalar_shader_stage(brw->intelScreen->compiler, shader_type)) {
        ops |= LOWER_UNPACK_UNORM_4x8
             | LOWER_UNPACK_SNORM_4x8
             | LOWER_PACK_UNORM_4x8
@@ -231,7 +218,7 @@ brw_lower_packing_builtins(struct brw_context *brw,
         * lowering is needed. For SOA code, the Half2x16 ops must be
         * scalarized.
         */
-      if (is_scalar_shader_stage(brw, shader_type)) {
+      if (is_scalar_shader_stage(brw->intelScreen->compiler, shader_type)) {
           ops |= LOWER_PACK_HALF_2x16_TO_SPLIT
               |  LOWER_UNPACK_HALF_2x16_TO_SPLIT;
        }
@@ -285,8 +272,6 @@ process_glsl_ir(gl_shader_stage stage,
     brw_lower_texture_gradients(brw, shader->ir);
     do_vec_index_to_cond_assign(shader->ir);
     lower_vector_insert(shader->ir, true);
-   if (options->NirOptions == NULL)
-      brw_do_cubemap_normalize(shader->ir);
     lower_offset_arrays(shader->ir);
     brw_do_lower_unnormalized_offset(shader->ir);
     lower_noise(shader->ir);
@@ -312,7 +297,7 @@ process_glsl_ir(gl_shader_stage stage,
     do {
        progress = false;
  
-      if (is_scalar_shader_stage(brw, shader->Stage)) {
+      if (is_scalar_shader_stage(brw->intelScreen->compiler, shader->Stage)) {
           brw_do_channel_expressions(shader->ir);
           brw_do_vector_splitting(shader->ir);
        }
@@ -350,13 +335,11 @@ GLboolean
  brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg)
  {
     struct brw_context *brw = brw_context(ctx);
+   const struct brw_compiler *compiler = brw->intelScreen->compiler;
     unsigned int stage;
  
     for (stage = 0; stage < ARRAY_SIZE(shProg->_LinkedShaders); stage++) {
        struct gl_shader *shader = shProg->_LinkedShaders[stage];
-      const struct gl_shader_compiler_options *options =
-         &ctx->Const.ShaderCompilerOptions[stage];
-
        if (!shader)
          continue;
  
@@ -404,10 +387,8 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg)
  
        brw_add_texrect_params(prog);
  
-      if (options->NirOptions) {
-         prog->nir = brw_create_nir(brw, shProg, prog, (gl_shader_stage) stage,
-                                    is_scalar_shader_stage(brw, stage));
-      }
+      prog->nir = brw_create_nir(brw, shProg, prog, (gl_shader_stage) stage,
+                                 is_scalar_shader_stage(compiler, stage));
  
        _mesa_reference_program(ctx, &prog, NULL);
     }
@@ -917,21 +898,16 @@ brw_abs_immediate(enum brw_reg_type type, struct brw_reg *reg)
  backend_shader::backend_shader(const struct brw_compiler *compiler,
                                 void *log_data,
                                 void *mem_ctx,
-                               struct gl_shader_program *shader_prog,
-                               struct gl_program *prog,
-                               struct brw_stage_prog_data *stage_prog_data,
-                               gl_shader_stage stage)
+                               nir_shader *shader,
+                               struct brw_stage_prog_data *stage_prog_data)
     : compiler(compiler),
       log_data(log_data),
       devinfo(compiler->devinfo),
-     shader(shader_prog ?
-        (struct brw_shader *)shader_prog->_LinkedShaders[stage] : NULL),
-     shader_prog(shader_prog),
-     prog(prog),
+     nir(shader),
       stage_prog_data(stage_prog_data),
       mem_ctx(mem_ctx),
       cfg(NULL),
-     stage(stage)
+     stage(shader->stage)
  {
     debug_enabled = INTEL_DEBUG & intel_debug_flag_for_shader_stage(stage);
     stage_name = _mesa_shader_stage_to_string(stage);
@@ -1374,16 +1350,25 @@ backend_shader::invalidate_cfg()
   * trigger some of our asserts that surface indices are < BRW_MAX_SURFACES.
   */
  void
-backend_shader::assign_common_binding_table_offsets(uint32_t next_binding_table_offset)
+brw_assign_common_binding_table_offsets(gl_shader_stage stage,
+                                        const struct brw_device_info *devinfo,
+                                        const struct gl_shader_program *shader_prog,
+                                        const struct gl_program *prog,
+                                        struct brw_stage_prog_data *stage_prog_data,
+                                        uint32_t next_binding_table_offset)
  {
+   const struct gl_shader *shader = NULL;
     int num_textures = _mesa_fls(prog->SamplersUsed);
  
+   if (shader_prog)
+      shader = shader_prog->_LinkedShaders[stage];
+
     stage_prog_data->binding_table.texture_start = next_binding_table_offset;
     next_binding_table_offset += num_textures;
  
     if (shader) {
        stage_prog_data->binding_table.ubo_start = next_binding_table_offset;
-      next_binding_table_offset += shader->base.NumUniformBlocks;
+      next_binding_table_offset += shader->NumUniformBlocks;
     } else {
        stage_prog_data->binding_table.ubo_start = 0xd0d0d0d0;
     }
@@ -1414,9 +1399,9 @@ backend_shader::assign_common_binding_table_offsets(uint32_t next_binding_table_
        stage_prog_data->binding_table.abo_start = 0xd0d0d0d0;
     }
  
-   if (shader && shader->base.NumImages) {
+   if (shader && shader->NumImages) {
        stage_prog_data->binding_table.image_start = next_binding_table_offset;
-      next_binding_table_offset += shader->base.NumImages;
+      next_binding_table_offset += shader->NumImages;
     } else {
        stage_prog_data->binding_table.image_start = 0xd0d0d0d0;
     }
@@ -1430,32 +1415,50 @@ backend_shader::assign_common_binding_table_offsets(uint32_t next_binding_table_
     /* prog_data->base.binding_table.size will be set by brw_mark_surface_used. */
  }
  
+static void
+setup_vec4_uniform_value(const gl_constant_value **params,
+                         const gl_constant_value *values,
+                         unsigned n)
+{
+   static const gl_constant_value zero = { 0 };
+
+   for (unsigned i = 0; i < n; ++i)
+      params[i] = &values[i];
+
+   for (unsigned i = n; i < 4; ++i)
+      params[i] = &zero;
+}
+
  void
-backend_shader::setup_image_uniform_values(unsigned param_offset,
-                                           const gl_uniform_storage *storage)
+brw_setup_image_uniform_values(gl_shader_stage stage,
+                               struct brw_stage_prog_data *stage_prog_data,
+                               unsigned param_start_index,
+                               const gl_uniform_storage *storage)
  {
-   const unsigned stage = _mesa_program_enum_to_shader_stage(prog->Target);
+   const gl_constant_value **param =
+      &stage_prog_data->param[param_start_index];
  
     for (unsigned i = 0; i < MAX2(storage->array_elements, 1); i++) {
        const unsigned image_idx = storage->image[stage].index + i;
-      const brw_image_param *param = &stage_prog_data->image_param[image_idx];
+      const brw_image_param *image_param =
+         &stage_prog_data->image_param[image_idx];
  
        /* Upload the brw_image_param structure.  The order is expected to match
         * the BRW_IMAGE_PARAM_*_OFFSET defines.
         */
-      setup_vec4_uniform_value(param_offset + BRW_IMAGE_PARAM_SURFACE_IDX_OFFSET,
-         (const gl_constant_value *)&param->surface_idx, 1);
-      setup_vec4_uniform_value(param_offset + BRW_IMAGE_PARAM_OFFSET_OFFSET,
-         (const gl_constant_value *)param->offset, 2);
-      setup_vec4_uniform_value(param_offset + BRW_IMAGE_PARAM_SIZE_OFFSET,
-         (const gl_constant_value *)param->size, 3);
-      setup_vec4_uniform_value(param_offset + BRW_IMAGE_PARAM_STRIDE_OFFSET,
-         (const gl_constant_value *)param->stride, 4);
-      setup_vec4_uniform_value(param_offset + BRW_IMAGE_PARAM_TILING_OFFSET,
-         (const gl_constant_value *)param->tiling, 3);
-      setup_vec4_uniform_value(param_offset + BRW_IMAGE_PARAM_SWIZZLING_OFFSET,
-         (const gl_constant_value *)param->swizzling, 2);
-      param_offset += BRW_IMAGE_PARAM_SIZE;
+      setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SURFACE_IDX_OFFSET,
+         (const gl_constant_value *)&image_param->surface_idx, 1);
+      setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_OFFSET_OFFSET,
+         (const gl_constant_value *)image_param->offset, 2);
+      setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SIZE_OFFSET,
+         (const gl_constant_value *)image_param->size, 3);
+      setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_STRIDE_OFFSET,
+         (const gl_constant_value *)image_param->stride, 4);
+      setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_TILING_OFFSET,
+         (const gl_constant_value *)image_param->tiling, 3);
+      setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SWIZZLING_OFFSET,
+         (const gl_constant_value *)image_param->swizzling, 2);
+      param += BRW_IMAGE_PARAM_SIZE;
  
        brw_mark_surface_used(
           stage_prog_data,
diff --git a/src/mesa/drivers/dri/i965/brw_shader.h b/src/mesa/drivers/dri/i965/brw_shader.h

index ccccf4d6938af19d0e824acdf57757ef3012a6f9..fd96740526b855e5006b4e14171ced7eb35cd15a 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_shader.h
+++ b/src/mesa/drivers/dri/i965/brw_shader.h
@@ -24,6 +24,7 @@
  #include <stdint.h>
  #include "brw_reg.h"
  #include "brw_defines.h"
+#include "brw_context.h"
  #include "main/compiler.h"
  #include "glsl/ir.h"
  #include "program/prog_parameter.h"
@@ -224,10 +225,8 @@ protected:
     backend_shader(const struct brw_compiler *compiler,
                    void *log_data,
                    void *mem_ctx,
-                  struct gl_shader_program *shader_prog,
-                  struct gl_program *prog,
-                  struct brw_stage_prog_data *stage_prog_data,
-                  gl_shader_stage stage);
+                  nir_shader *shader,
+                  struct brw_stage_prog_data *stage_prog_data);
  
  public:
  
@@ -235,9 +234,7 @@ public:
     void *log_data; /* Passed to compiler->*_log functions */
  
     const struct brw_device_info * const devinfo;
-   struct brw_shader * const shader;
-   struct gl_shader_program * const shader_prog;
-   struct gl_program * const prog;
+   nir_shader *nir;
     struct brw_stage_prog_data * const stage_prog_data;
  
     /** ralloc context for temporary data used during compile */
@@ -266,19 +263,16 @@ public:
     void calculate_cfg();
     void invalidate_cfg();
  
-   void assign_common_binding_table_offsets(uint32_t next_binding_table_offset);
-
     virtual void invalidate_live_intervals() = 0;
-
-   virtual void setup_vec4_uniform_value(unsigned param_offset,
-                                         const gl_constant_value *values,
-                                         unsigned n) = 0;
-   void setup_image_uniform_values(unsigned param_offset,
-                                   const gl_uniform_storage *storage);
  };
  
  uint32_t brw_texture_offset(int *offsets, unsigned num_components);
  
+void brw_setup_image_uniform_values(gl_shader_stage stage,
+                                    struct brw_stage_prog_data *stage_prog_data,
+                                    unsigned param_start_index,
+                                    const gl_uniform_storage *storage);
+
  #endif /* __cplusplus */
  
  enum brw_reg_type brw_type_for_base_type(const struct glsl_type *type);
@@ -296,6 +290,14 @@ extern "C" {
  struct brw_compiler *
  brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo);
  
+void
+brw_assign_common_binding_table_offsets(gl_shader_stage stage,
+                                        const struct brw_device_info *devinfo,
+                                        const struct gl_shader_program *shader_prog,
+                                        const struct gl_program *prog,
+                                        struct brw_stage_prog_data *stage_prog_data,
+                                        uint32_t next_binding_table_offset);
+
  bool brw_vs_precompile(struct gl_context *ctx,
                         struct gl_shader_program *shader_prog,
                         struct gl_program *prog);
diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h

index 3b7a4330c7aea86a8f7b39ae995fe09900a4e7ea..dc2b94156730c50b99cbffaeadb0724cf563cae8 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@@ -87,6 +87,7 @@ extern const struct brw_tracked_state brw_vs_binding_table;
  extern const struct brw_tracked_state brw_wm_ubo_surfaces;
  extern const struct brw_tracked_state brw_wm_abo_surfaces;
  extern const struct brw_tracked_state brw_wm_image_surfaces;
+extern const struct brw_tracked_state brw_cs_ubo_surfaces;
  extern const struct brw_tracked_state brw_cs_abo_surfaces;
  extern const struct brw_tracked_state brw_cs_image_surfaces;
  extern const struct brw_tracked_state brw_wm_unit;
diff --git a/src/mesa/drivers/dri/i965/brw_state_cache.c b/src/mesa/drivers/dri/i965/brw_state_cache.c

index fbc041920f451793fdd0a10648d58ff65f0b9783..2fbcd1467508d48f6fbdf0c4939171d6f3c6ede4 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_state_cache.c
+++ b/src/mesa/drivers/dri/i965/brw_state_cache.c
@@ -350,10 +350,6 @@ brw_init_caches(struct brw_context *brw)
     if (brw->has_llc)
        drm_intel_gem_bo_map_unsynchronized(cache->bo);
  
-   cache->aux_compare[BRW_CACHE_VS_PROG] = brw_vs_prog_data_compare;
-   cache->aux_compare[BRW_CACHE_GS_PROG] = brw_gs_prog_data_compare;
-   cache->aux_compare[BRW_CACHE_FS_PROG] = brw_wm_prog_data_compare;
-   cache->aux_compare[BRW_CACHE_CS_PROG] = brw_cs_prog_data_compare;
     cache->aux_free[BRW_CACHE_VS_PROG] = brw_stage_prog_data_free;
     cache->aux_free[BRW_CACHE_GS_PROG] = brw_stage_prog_data_free;
     cache->aux_free[BRW_CACHE_FS_PROG] = brw_stage_prog_data_free;
diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c

index 46687e342d3fbbb8fc86bb4dbe4bdf5dad87baba..79b8301954e4164e5edb8efd6515b8cf9497fc7b 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_state_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
@@ -259,6 +259,7 @@ static const struct brw_tracked_state *gen7_compute_atoms[] =
     &brw_state_base_address,
     &brw_cs_image_surfaces,
     &gen7_cs_push_constants,
+   &brw_cs_ubo_surfaces,
     &brw_cs_abo_surfaces,
     &brw_texture_surfaces,
     &brw_cs_work_groups_surface,
@@ -352,6 +353,7 @@ static const struct brw_tracked_state *gen8_compute_atoms[] =
     &gen8_state_base_address,
     &brw_cs_image_surfaces,
     &gen7_cs_push_constants,
+   &brw_cs_ubo_surfaces,
     &brw_cs_abo_surfaces,
     &brw_texture_surfaces,
     &brw_cs_work_groups_surface,
diff --git a/src/mesa/drivers/dri/i965/brw_tex_layout.c b/src/mesa/drivers/dri/i965/brw_tex_layout.c

index 268b995f92e0a244acc128be7afcf2f5bd4a4fb3..2955c8dcc2e2499b7ef178d07272dcd6bef4f920 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_tex_layout.c
+++ b/src/mesa/drivers/dri/i965/brw_tex_layout.c
@@ -282,7 +282,7 @@ gen9_miptree_layout_1d(struct intel_mipmap_tree *mt)
     /* When this layout is used the horizontal alignment is fixed at 64 and the
      * hardware ignores the value given in the surface state
      */
-   const unsigned int align_w = 64;
+   const unsigned int halign = 64;
  
     mt->total_height = mt->physical_height0;
     mt->total_width = 0;
@@ -292,7 +292,7 @@ gen9_miptree_layout_1d(struct intel_mipmap_tree *mt)
  
        intel_miptree_set_level_info(mt, level, x, 0, depth);
  
-      img_width = ALIGN(width, align_w);
+      img_width = ALIGN(width, halign);
  
        mt->total_width = MAX2(mt->total_width, x + img_width);
  
@@ -328,10 +328,10 @@ brw_miptree_layout_2d(struct intel_mipmap_tree *mt)
         unsigned mip1_width;
  
         if (mt->compressed) {
-          mip1_width = ALIGN_NPOT(minify(mt->physical_width0, 1), mt->align_w) +
+          mip1_width = ALIGN_NPOT(minify(mt->physical_width0, 1), mt->halign) +
               ALIGN_NPOT(minify(mt->physical_width0, 2), bw);
         } else {
-          mip1_width = ALIGN_NPOT(minify(mt->physical_width0, 1), mt->align_w) +
+          mip1_width = ALIGN_NPOT(minify(mt->physical_width0, 1), mt->halign) +
               minify(mt->physical_width0, 2);
         }
  
@@ -348,7 +348,7 @@ brw_miptree_layout_2d(struct intel_mipmap_tree *mt)
  
        intel_miptree_set_level_info(mt, level, x, y, depth);
  
-      img_height = ALIGN_NPOT(height, mt->align_h);
+      img_height = ALIGN_NPOT(height, mt->valign);
        if (mt->compressed)
          img_height /= bh;
  
@@ -365,7 +365,7 @@ brw_miptree_layout_2d(struct intel_mipmap_tree *mt)
        /* Layout_below: step right after second mipmap.
         */
        if (level == mt->first_level + 1) {
-        x += ALIGN_NPOT(width, mt->align_w) / bw;
+        x += ALIGN_NPOT(width, mt->halign) / bw;
        } else {
          y += img_height;
        }
@@ -385,7 +385,7 @@ brw_miptree_get_horizontal_slice_pitch(const struct brw_context *brw,
  {
     if ((brw->gen < 9 && mt->target == GL_TEXTURE_3D) ||
         (brw->gen == 4 && mt->target == GL_TEXTURE_CUBE_MAP)) {
-      return ALIGN_NPOT(minify(mt->physical_width0, level), mt->align_w);
+      return ALIGN_NPOT(minify(mt->physical_width0, level), mt->halign);
     } else {
        return 0;
     }
@@ -426,13 +426,13 @@ brw_miptree_get_vertical_slice_pitch(const struct brw_context *brw,
     } else if (mt->target == GL_TEXTURE_3D ||
                (brw->gen == 4 && mt->target == GL_TEXTURE_CUBE_MAP) ||
                mt->array_layout == ALL_SLICES_AT_EACH_LOD) {
-      return ALIGN_NPOT(minify(mt->physical_height0, level), mt->align_h);
+      return ALIGN_NPOT(minify(mt->physical_height0, level), mt->valign);
  
     } else {
-      const unsigned h0 = ALIGN_NPOT(mt->physical_height0, mt->align_h);
-      const unsigned h1 = ALIGN_NPOT(minify(mt->physical_height0, 1), mt->align_h);
+      const unsigned h0 = ALIGN_NPOT(mt->physical_height0, mt->valign);
+      const unsigned h1 = ALIGN_NPOT(minify(mt->physical_height0, 1), mt->valign);
  
-      return h0 + h1 + (brw->gen >= 7 ? 12 : 11) * mt->align_h;
+      return h0 + h1 + (brw->gen >= 7 ? 12 : 11) * mt->valign;
     }
  }
  
@@ -502,9 +502,9 @@ brw_miptree_layout_texture_array(struct brw_context *brw,
  
     for (unsigned level = mt->first_level; level <= mt->last_level; level++) {
        unsigned img_height;
-      img_height = ALIGN_NPOT(height, mt->align_h);
+      img_height = ALIGN_NPOT(height, mt->valign);
        if (mt->compressed)
-         img_height /= mt->align_h;
+         img_height /= mt->valign;
  
        for (unsigned q = 0; q < mt->level[level].depth; q++) {
           if (mt->array_layout == ALL_SLICES_AT_EACH_LOD) {
@@ -537,8 +537,8 @@ brw_miptree_layout_texture_3d(struct brw_context *brw,
        unsigned WL = MAX2(mt->physical_width0 >> level, 1);
        unsigned HL = MAX2(mt->physical_height0 >> level, 1);
        unsigned DL = MAX2(mt->physical_depth0 >> level, 1);
-      unsigned wL = ALIGN_NPOT(WL, mt->align_w);
-      unsigned hL = ALIGN_NPOT(HL, mt->align_h);
+      unsigned wL = ALIGN_NPOT(WL, mt->halign);
+      unsigned hL = ALIGN_NPOT(HL, mt->valign);
  
        if (mt->target == GL_TEXTURE_CUBE_MAP)
           DL = 6;
@@ -656,7 +656,7 @@ brw_miptree_choose_tiling(struct brw_context *brw,
      * to know that ahead of time.  And besides, since we use a vertical
      * alignment of 4 as often as we can, this shouldn't happen very often.
      */
-   if (brw->gen == 7 && mt->align_h == 2 &&
+   if (brw->gen == 7 && mt->valign == 2 &&
         brw->format_supported_as_render_target[mt->format]) {
        return I915_TILING_X;
     }
@@ -748,21 +748,21 @@ intel_miptree_set_alignment(struct brw_context *brw,
           /* Stencil uses W tiling, so we force W tiling alignment for the
            * ALL_SLICES_AT_EACH_LOD miptree layout.
            */
-         mt->align_w = 64;
-         mt->align_h = 64;
+         mt->halign = 64;
+         mt->valign = 64;
           assert((layout_flags & MIPTREE_LAYOUT_FORCE_HALIGN16) == 0);
        } else {
           /* Depth uses Y tiling, so we force need Y tiling alignment for the
            * ALL_SLICES_AT_EACH_LOD miptree layout.
            */
-         mt->align_w = 128 / mt->cpp;
-         mt->align_h = 32;
+         mt->halign = 128 / mt->cpp;
+         mt->valign = 32;
        }
     } else if (mt->compressed) {
         /* The hardware alignment requirements for compressed textures
          * happen to match the block boundaries.
          */
-      _mesa_get_format_block_size(mt->format, &mt->align_w, &mt->align_h);
+      _mesa_get_format_block_size(mt->format, &mt->halign, &mt->valign);
  
        /* On Gen9+ we can pick our own alignment for compressed textures but it
         * has to be a multiple of the block size. The minimum alignment we can
@@ -770,21 +770,21 @@ intel_miptree_set_alignment(struct brw_context *brw,
         * size
         */
        if (brw->gen >= 9) {
-         mt->align_w *= 4;
-         mt->align_h *= 4;
+         mt->halign *= 4;
+         mt->valign *= 4;
        }
     } else if (mt->format == MESA_FORMAT_S_UINT8) {
-      mt->align_w = 8;
-      mt->align_h = brw->gen >= 7 ? 8 : 4;
+      mt->halign = 8;
+      mt->valign = brw->gen >= 7 ? 8 : 4;
     } else if (brw->gen >= 9 && mt->tr_mode != INTEL_MIPTREE_TRMODE_NONE) {
        /* XY_FAST_COPY_BLT doesn't support horizontal alignment < 32 or
         * vertical alignment < 64. */
-      mt->align_w = MAX2(tr_mode_horizontal_texture_alignment(brw, mt), 32);
-      mt->align_h = MAX2(tr_mode_vertical_texture_alignment(brw, mt), 64);
+      mt->halign = MAX2(tr_mode_horizontal_texture_alignment(brw, mt), 32);
+      mt->valign = MAX2(tr_mode_vertical_texture_alignment(brw, mt), 64);
     } else {
-      mt->align_w =
+      mt->halign =
           intel_horizontal_texture_alignment_unit(brw, mt, layout_flags);
-      mt->align_h = intel_vertical_texture_alignment_unit(brw, mt);
+      mt->valign = intel_vertical_texture_alignment_unit(brw, mt);
     }
  }
  
@@ -809,8 +809,8 @@ brw_miptree_layout(struct brw_context *brw,
     if (brw->gen >= 9) {
        unsigned int i, j;
        _mesa_get_format_block_size(mt->format, &i, &j);
-      mt->align_w /= i;
-      mt->align_h /= j;
+      mt->halign /= i;
+      mt->valign /= j;
     }
  
     if ((layout_flags & MIPTREE_LAYOUT_FOR_BO) == 0)
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp

index 1d62f2f6a75f609abd901cf54776086d5a7d4ef0..689c767d2d74c1314c4b9266ed45f24d7e988f6b 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -518,11 +518,11 @@ vec4_visitor::split_uniform_registers()
  void
  vec4_visitor::pack_uniform_registers()
  {
-   bool uniform_used[this->uniforms];
+   uint8_t chans_used[this->uniforms];
     int new_loc[this->uniforms];
     int new_chan[this->uniforms];
  
-   memset(uniform_used, 0, sizeof(uniform_used));
+   memset(chans_used, 0, sizeof(chans_used));
     memset(new_loc, 0, sizeof(new_loc));
     memset(new_chan, 0, sizeof(new_chan));
  
@@ -531,11 +531,36 @@ vec4_visitor::pack_uniform_registers()
      * to pull constants, and from some GLSL code generators like wine.
      */
     foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
+      unsigned readmask;
+      switch (inst->opcode) {
+      case VEC4_OPCODE_PACK_BYTES:
+      case BRW_OPCODE_DP4:
+      case BRW_OPCODE_DPH:
+         readmask = 0xf;
+         break;
+      case BRW_OPCODE_DP3:
+         readmask = 0x7;
+         break;
+      case BRW_OPCODE_DP2:
+         readmask = 0x3;
+         break;
+      default:
+         readmask = inst->dst.writemask;
+         break;
+      }
+
        for (int i = 0 ; i < 3; i++) {
-        if (inst->src[i].file != UNIFORM)
-           continue;
+         if (inst->src[i].file != UNIFORM)
+            continue;
  
-        uniform_used[inst->src[i].reg] = true;
+         int reg = inst->src[i].reg;
+         for (int c = 0; c < 4; c++) {
+            if (!(readmask & (1 << c)))
+               continue;
+
+            chans_used[reg] = MAX2(chans_used[reg],
+                                   BRW_GET_SWZ(inst->src[i].swizzle, c) + 1);
+         }
        }
     }
  
@@ -546,17 +571,15 @@ vec4_visitor::pack_uniform_registers()
      */
     for (int src = 0; src < uniforms; src++) {
        assert(src < uniform_array_size);
-      int size = this->uniform_vector_size[src];
+      int size = chans_used[src];
  
-      if (!uniform_used[src]) {
-        this->uniform_vector_size[src] = 0;
-        continue;
-      }
+      if (size == 0)
+         continue;
  
        int dst;
        /* Find the lowest place we can slot this uniform in. */
        for (dst = 0; dst < src; dst++) {
-        if (this->uniform_vector_size[dst] + size <= 4)
+        if (chans_used[dst] + size <= 4)
             break;
        }
  
@@ -565,7 +588,7 @@ vec4_visitor::pack_uniform_registers()
          new_chan[src] = 0;
        } else {
          new_loc[src] = dst;
-        new_chan[src] = this->uniform_vector_size[dst];
+        new_chan[src] = chans_used[dst];
  
          /* Move the references to the data */
          for (int j = 0; j < size; j++) {
@@ -573,8 +596,8 @@ vec4_visitor::pack_uniform_registers()
                stage_prog_data->param[src * 4 + j];
          }
  
-        this->uniform_vector_size[dst] += size;
-        this->uniform_vector_size[src] = 0;
+        chans_used[dst] += size;
+        chans_used[src] = 0;
        }
  
        new_uniform_count = MAX2(new_uniform_count, dst + 1);
@@ -1643,7 +1666,6 @@ vec4_visitor::setup_uniforms(int reg)
      */
     if (devinfo->gen < 6 && this->uniforms == 0) {
        assert(this->uniforms < this->uniform_array_size);
-      this->uniform_vector_size[this->uniforms] = 1;
  
        stage_prog_data->param =
           reralloc(NULL, stage_prog_data->param, const gl_constant_value *, 4);
@@ -1685,12 +1707,6 @@ vec4_vs_visitor::setup_payload(void)
     this->first_non_payload_grf = reg;
  }
  
-void
-vec4_visitor::assign_binding_table_offsets()
-{
-   assign_common_binding_table_offsets(0);
-}
-
  src_reg
  vec4_visitor::get_timestamp()
  {
@@ -1786,31 +1802,14 @@ vec4_visitor::emit_shader_time_write(int shader_time_subindex, src_reg value)
  bool
  vec4_visitor::run()
  {
-   bool use_vec4_nir =
-      compiler->glsl_compiler_options[stage].NirOptions != NULL;
-
-   sanity_param_count = prog->Parameters->NumParameters;
-
     if (shader_time_index >= 0)
        emit_shader_time_begin();
  
-   assign_binding_table_offsets();
-
     emit_prolog();
  
-   if (use_vec4_nir) {
-      assert(prog->nir != NULL);
-      emit_nir_code();
-      if (failed)
-         return false;
-   } else if (shader) {
-      /* Generate VS IR for main().  (the visitor only descends into
-       * functions called "main").
-       */
-      visit_instructions(shader->base.ir);
-   } else {
-      emit_program_code();
-   }
+   emit_nir_code();
+   if (failed)
+      return false;
     base_ir = NULL;
  
     emit_thread_end();
@@ -1823,18 +1822,9 @@ vec4_visitor::run()
      * that we have reladdr computations available for CSE, since we'll
      * often do repeated subexpressions for those.
      */
-   if (shader || use_vec4_nir) {
-      move_grf_array_access_to_scratch();
-      move_uniform_array_access_to_pull_constants();
-   } else {
-      /* The ARB_vertex_program frontend emits pull constant loads directly
-       * rather than using reladdr, so we don't need to walk through all the
-       * instructions looking for things to move.  There isn't anything.
-       *
-       * We do still need to split things to vec4 size.
-       */
-      split_uniform_registers();
-   }
+   move_grf_array_access_to_scratch();
+   move_uniform_array_access_to_pull_constants();
+
     pack_uniform_registers();
     move_push_constants_to_pull_constants();
     split_virtual_grfs();
@@ -1845,8 +1835,8 @@ vec4_visitor::run()
                                                                         \
        if (unlikely(INTEL_DEBUG & DEBUG_OPTIMIZER) && this_progress) {  \
           char filename[64];                                            \
-         snprintf(filename, 64, "%s-%04d-%02d-%02d-" #pass,            \
-                  stage_abbrev, shader_prog ? shader_prog->Name : 0, iteration, pass_num); \
+         snprintf(filename, 64, "%s-%s-%02d-%02d-" #pass,              \
+                  stage_abbrev, nir->info.name, iteration, pass_num);  \
                                                                         \
           backend_shader::dump_instructions(filename);                  \
        }                                                                \
@@ -1858,8 +1848,8 @@ vec4_visitor::run()
  
     if (unlikely(INTEL_DEBUG & DEBUG_OPTIMIZER)) {
        char filename[64];
-      snprintf(filename, 64, "%s-%04d-00-start",
-               stage_abbrev, shader_prog ? shader_prog->Name : 0);
+      snprintf(filename, 64, "%s-%s-00-start",
+               stage_abbrev, nir->info.name);
  
        backend_shader::dump_instructions(filename);
     }
@@ -1933,13 +1923,6 @@ vec4_visitor::run()
           brw_get_scratch_size(last_scratch * REG_SIZE);
     }
  
-   /* If any state parameters were appended, then ParameterValues could have
-    * been realloced, in which case the driver uniform storage set up by
-    * _mesa_associate_uniform_storage() would point to freed memory.  Make
-    * sure that didn't happen.
-    */
-   assert(sanity_param_count == prog->Parameters->NumParameters);
-
     return !failed;
  }
  
@@ -1974,27 +1957,13 @@ brw_vs_emit(struct brw_context *brw,
     if (unlikely(INTEL_DEBUG & DEBUG_VS) && shader->base.ir)
        brw_dump_ir("vertex", prog, &shader->base, &vp->Base);
  
-   if (!vp->Base.nir &&
-       (brw->intelScreen->compiler->scalar_vs ||
-        brw->intelScreen->compiler->glsl_compiler_options[MESA_SHADER_VERTEX].NirOptions != NULL)) {
-      /* Normally we generate NIR in LinkShader() or
-       * ProgramStringNotify(), but Mesa's fixed-function vertex program
-       * handling doesn't notify the driver at all.  Just do it here, at
-       * the last minute, even though it's lame.
-       */
-      assert(vp->Base.Id == 0 && prog == NULL);
-      vp->Base.nir =
-         brw_create_nir(brw, NULL, &vp->Base, MESA_SHADER_VERTEX,
-                        brw->intelScreen->compiler->scalar_vs);
-   }
-
     if (brw->intelScreen->compiler->scalar_vs) {
        prog_data->base.dispatch_mode = DISPATCH_MODE_SIMD8;
  
        fs_visitor v(brw->intelScreen->compiler, brw,
-                   mem_ctx, MESA_SHADER_VERTEX, key,
-                   &prog_data->base.base, prog, &vp->Base,
-                   8, st_index);
+                   mem_ctx, key, &prog_data->base.base,
+                   NULL, /* prog; Only used for TEXTURE_RECTANGLE on gen < 8 */
+                   vp->Base.nir, 8, st_index);
        if (!v.run_vs(brw_select_clip_planes(&brw->ctx))) {
           if (prog) {
              prog->LinkStatus = false;
@@ -2031,7 +2000,7 @@ brw_vs_emit(struct brw_context *brw,
        prog_data->base.dispatch_mode = DISPATCH_MODE_4X2_DUAL_OBJECT;
  
        vec4_vs_visitor v(brw->intelScreen->compiler, brw, key, prog_data,
-                        vp, prog, brw_select_clip_planes(&brw->ctx),
+                        vp->Base.nir, brw_select_clip_planes(&brw->ctx),
                          mem_ctx, st_index,
                          !_mesa_is_gles3(&brw->ctx));
        if (!v.run()) {
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h

index ac9bd4a11fa899681b50bfbe9bbfea83e7cfcf2d..51b3161f6593ddb3dcaa12a682d2b714540bf6a7 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -65,16 +65,14 @@ class vec4_live_variables;
   * Translates either GLSL IR or Mesa IR (for ARB_vertex_program and
   * fixed-function) into VS IR.
   */
-class vec4_visitor : public backend_shader, public ir_visitor
+class vec4_visitor : public backend_shader
  {
  public:
     vec4_visitor(const struct brw_compiler *compiler,
                  void *log_data,
-                struct gl_program *prog,
                  const struct brw_sampler_prog_key_data *key,
                  struct brw_vue_prog_data *prog_data,
-               struct gl_shader_program *shader_prog,
-                gl_shader_stage stage,
+                nir_shader *shader,
                 void *mem_ctx,
                  bool no_spills,
                  int shader_time_index);
@@ -97,8 +95,6 @@ public:
  
     const struct brw_sampler_prog_key_data * const key_tex;
     struct brw_vue_prog_data * const prog_data;
-   unsigned int sanity_param_count;
-
     char *fail_msg;
     bool failed;
  
@@ -116,66 +112,22 @@ public:
     brw::vec4_live_variables *live_intervals;
     dst_reg userplane[MAX_CLIP_PLANES];
  
-   dst_reg *variable_storage(ir_variable *var);
-
-   void reladdr_to_temp(ir_instruction *ir, src_reg *reg, int *num_reladdr);
-
     bool need_all_constants_in_pull_buffer;
  
-   /**
-    * \name Visit methods
-    *
-    * As typical for the visitor pattern, there must be one \c visit method for
-    * each concrete subclass of \c ir_instruction.  Virtual base classes within
-    * the hierarchy should not have \c visit methods.
-    */
-   /*@{*/
-   virtual void visit(ir_variable *);
-   virtual void visit(ir_loop *);
-   virtual void visit(ir_loop_jump *);
-   virtual void visit(ir_function_signature *);
-   virtual void visit(ir_function *);
-   virtual void visit(ir_expression *);
-   virtual void visit(ir_swizzle *);
-   virtual void visit(ir_dereference_variable  *);
-   virtual void visit(ir_dereference_array *);
-   virtual void visit(ir_dereference_record *);
-   virtual void visit(ir_assignment *);
-   virtual void visit(ir_constant *);
-   virtual void visit(ir_call *);
-   virtual void visit(ir_return *);
-   virtual void visit(ir_discard *);
-   virtual void visit(ir_texture *);
-   virtual void visit(ir_if *);
-   virtual void visit(ir_emit_vertex *);
-   virtual void visit(ir_end_primitive *);
-   virtual void visit(ir_barrier *);
-   /*@}*/
-
-   src_reg result;
-
     /* Regs for vertex results.  Generated at ir_variable visiting time
      * for the ir->location's used.
      */
     dst_reg output_reg[BRW_VARYING_SLOT_COUNT];
     const char *output_reg_annotation[BRW_VARYING_SLOT_COUNT];
     int *uniform_size;
-   int *uniform_vector_size;
-   int uniform_array_size; /*< Size of uniform_[vector_]size arrays */
+   int uniform_array_size; /*< Size of the uniform_size array */
     int uniforms;
  
     src_reg shader_start_time;
  
-   struct hash_table *variable_ht;
-
     bool run();
     void fail(const char *msg, ...);
  
-   virtual void setup_vec4_uniform_value(unsigned param_offset,
-                                         const gl_constant_value *values,
-                                         unsigned n);
-   void setup_uniform_values(ir_variable *ir);
-   void setup_builtin_uniform_values(ir_variable *ir);
     int setup_uniforms(int payload_reg);
  
     bool reg_allocate_trivial();
@@ -271,21 +223,9 @@ public:
  
     int implied_mrf_writes(vec4_instruction *inst);
  
-   bool try_rewrite_rhs_to_dst(ir_assignment *ir,
-                              dst_reg dst,
-                              src_reg src,
-                              vec4_instruction *pre_rhs_inst,
-                              vec4_instruction *last_rhs_inst);
-
-   /** Walks an exec_list of ir_instruction and sends it through this visitor. */
-   void visit_instructions(const exec_list *list);
-
     void emit_vp_sop(enum brw_conditional_mod condmod, dst_reg dst,
                      src_reg src0, src_reg src1, src_reg one);
  
-   void emit_bool_to_cond_code(ir_rvalue *ir, enum brw_predicate *predicate);
-   void emit_if_gen6(ir_if *ir);
-
     vec4_instruction *emit_minmax(enum brw_conditional_mod conditionalmod, dst_reg dst,
                                   src_reg src0, src_reg src1);
  
@@ -298,22 +238,11 @@ public:
      */
     src_reg emit_uniformize(const src_reg &src);
  
-   void emit_block_move(dst_reg *dst, src_reg *src,
-                        const struct glsl_type *type, brw_predicate predicate);
-
-   void emit_constant_values(dst_reg *dst, ir_constant *value);
-
     /**
      * Emit the correct dot-product instruction for the type of arguments
      */
     void emit_dp(dst_reg dst, src_reg src0, src_reg src1, unsigned elements);
  
-   void emit_scalar(ir_instruction *ir, enum prog_opcode op,
-                   dst_reg dst, src_reg src0);
-
-   void emit_scalar(ir_instruction *ir, enum prog_opcode op,
-                   dst_reg dst, src_reg src0, src_reg src1);
-
     src_reg fix_3src_operand(const src_reg &src);
     src_reg resolve_source_modifiers(const src_reg &src);
  
@@ -389,29 +318,20 @@ public:
     src_reg emit_resolve_reladdr(int scratch_loc[], bblock_t *block,
                                  vec4_instruction *inst, src_reg src);
  
-   bool try_emit_mad(ir_expression *ir);
-   bool try_emit_b2f_of_compare(ir_expression *ir);
     void resolve_ud_negate(src_reg *reg);
-   void resolve_bool_comparison(ir_rvalue *rvalue, src_reg *reg);
  
     src_reg get_timestamp();
  
-   bool process_move_condition(ir_rvalue *ir);
-
     void dump_instruction(backend_instruction *inst);
     void dump_instruction(backend_instruction *inst, FILE *file);
  
-   void visit_atomic_counter_intrinsic(ir_call *ir);
-
     bool is_high_sampler(src_reg sampler);
  
     virtual void emit_nir_code();
-   virtual void nir_setup_inputs(nir_shader *shader);
-   virtual void nir_setup_uniforms(nir_shader *shader);
-   virtual void nir_setup_uniform(nir_variable *var);
-   virtual void nir_setup_builtin_uniform(nir_variable *var);
+   virtual void nir_setup_inputs();
+   virtual void nir_setup_uniforms();
     virtual void nir_setup_system_value_intrinsic(nir_intrinsic_instr *instr);
-   virtual void nir_setup_system_values(nir_shader *shader);
+   virtual void nir_setup_system_values();
     virtual void nir_emit_impl(nir_function_impl *impl);
     virtual void nir_emit_cf_list(exec_list *list);
     virtual void nir_emit_if(nir_if *if_stmt);
@@ -450,14 +370,11 @@ protected:
                                      bool interleaved);
     void setup_payload_interference(struct ra_graph *g, int first_payload_node,
                                     int reg_node_count);
-   virtual void assign_binding_table_offsets();
     virtual void setup_payload() = 0;
     virtual void emit_prolog() = 0;
-   virtual void emit_program_code() = 0;
     virtual void emit_thread_end() = 0;
     virtual void emit_urb_write_header(int mrf) = 0;
     virtual vec4_instruction *emit_urb_write_opcode(bool complete) = 0;
-   virtual int compute_array_stride(ir_dereference_array *ir);
     virtual void gs_emit_vertex(int stream_id);
     virtual void gs_end_primitive();
  
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp

index 4f4e1e12fabf0e8897fd0d579ddee5cc7b2f3408..af4c102c026dc7841d03fd31aebf366cec996e34 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp
@@ -27,11 +27,11 @@
  namespace brw {
  
  void
-vec4_gs_visitor::nir_setup_inputs(nir_shader *shader)
+vec4_gs_visitor::nir_setup_inputs()
  {
-   nir_inputs = ralloc_array(mem_ctx, src_reg, shader->num_inputs);
+   nir_inputs = ralloc_array(mem_ctx, src_reg, nir->num_inputs);
  
-   foreach_list_typed(nir_variable, var, node, &shader->inputs) {
+   foreach_list_typed(nir_variable, var, node, &nir->inputs) {
        int offset = var->data.driver_location;
        if (var->type->base_type == GLSL_TYPE_ARRAY) {
           /* Geometry shader inputs are arrays, but they use an unusual array
@@ -72,6 +72,10 @@ vec4_gs_visitor::nir_setup_system_value_intrinsic(nir_intrinsic_instr *instr)
     dst_reg *reg;
  
     switch (instr->intrinsic) {
+   case nir_intrinsic_load_primitive_id:
+      /* We'll just read g1 directly; don't create a temporary. */
+      break;
+
     case nir_intrinsic_load_invocation_id:
        reg = &this->nir_system_values[SYSTEM_VALUE_INVOCATION_ID];
        if (reg->file == BAD_FILE)
@@ -111,6 +115,12 @@ vec4_gs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
           retype(get_nir_src(instr->src[0], 1), BRW_REGISTER_TYPE_UD);
        break;
  
+   case nir_intrinsic_load_primitive_id:
+      assert(c->prog_data.include_primitive_id);
+      dest = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D);
+      emit(MOV(dest, retype(brw_vec4_grf(1, 0), BRW_REGISTER_TYPE_D)));
+      break;
+
     case nir_intrinsic_load_invocation_id: {
        src_reg invocation_id =
           src_reg(nir_system_values[SYSTEM_VALUE_INVOCATION_ID]);
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp

index d2edc5782fdd90f9d4ddeeeba13e978ba8d9e20c..c673ccd137caea614a92a33d56245b0aa7fd9645 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
@@ -38,13 +38,14 @@ vec4_gs_visitor::vec4_gs_visitor(const struct brw_compiler *compiler,
                                   void *log_data,
                                   struct brw_gs_compile *c,
                                   struct gl_shader_program *prog,
+                                 nir_shader *shader,
                                   void *mem_ctx,
                                   bool no_spills,
                                   int shader_time_index)
-   : vec4_visitor(compiler, log_data,
-                  &c->gp->program.Base, &c->key.tex,
-                  &c->prog_data.base, prog, MESA_SHADER_GEOMETRY, mem_ctx,
+   : vec4_visitor(compiler, log_data, &c->key.tex,
+                  &c->prog_data.base, shader,  mem_ctx,
                    no_spills, shader_time_index),
+     shader_prog(prog),
       c(c)
  {
  }
@@ -207,15 +208,6 @@ vec4_gs_visitor::emit_prolog()
     this->current_annotation = NULL;
  }
  
-
-void
-vec4_gs_visitor::emit_program_code()
-{
-   /* We don't support NV_geometry_program4. */
-   unreachable("Unreached");
-}
-
-
  void
  vec4_gs_visitor::emit_thread_end()
  {
@@ -310,24 +302,6 @@ vec4_gs_visitor::emit_urb_write_opcode(bool complete)
  }
  
  
-int
-vec4_gs_visitor::compute_array_stride(ir_dereference_array *ir)
-{
-   /* Geometry shader inputs are arrays, but they use an unusual array layout:
-    * instead of all array elements for a given geometry shader input being
-    * stored consecutively, all geometry shader inputs are interleaved into
-    * one giant array.  At this stage of compilation, we assume that the
-    * stride of the array is BRW_VARYING_SLOT_COUNT.  Later,
-    * setup_attributes() will remap our accesses to the actual input array.
-    */
-   ir_dereference_variable *deref_var = ir->array->as_dereference_variable();
-   if (deref_var && deref_var->var->data.mode == ir_var_shader_in)
-      return BRW_VARYING_SLOT_COUNT;
-   else
-      return vec4_visitor::compute_array_stride(ir);
-}
-
-
  /**
   * Write out a batch of 32 control data bits from the control_data_bits
   * register to the URB.
@@ -575,27 +549,6 @@ vec4_gs_visitor::gs_emit_vertex(int stream_id)
     this->current_annotation = NULL;
  }
  
-void
-vec4_gs_visitor::visit(ir_emit_vertex *ir)
-{
-   /* To ensure that we don't output more vertices than the shader specified
-    * using max_vertices, do the logic inside a conditional of the form "if
-    * (vertex_count < MAX)"
-    */
-   unsigned num_output_vertices = c->gp->program.VerticesOut;
-   emit(CMP(dst_null_d(), this->vertex_count,
-            src_reg(num_output_vertices), BRW_CONDITIONAL_L));
-   emit(IF(BRW_PREDICATE_NORMAL));
-
-   gs_emit_vertex(ir->stream_id());
-
-   this->current_annotation = "emit vertex: increment vertex count";
-   emit(ADD(dst_reg(this->vertex_count), this->vertex_count,
-            src_reg(1u)));
-
-   emit(BRW_OPCODE_ENDIF);
-}
-
  void
  vec4_gs_visitor::gs_end_primitive()
  {
@@ -647,12 +600,6 @@ vec4_gs_visitor::gs_end_primitive()
     emit(OR(dst_reg(this->control_data_bits), this->control_data_bits, mask));
  }
  
-void
-vec4_gs_visitor::visit(ir_end_primitive *)
-{
-   gs_end_primitive();
-}
-
  static const unsigned *
  generate_assembly(struct brw_context *brw,
                    struct gl_shader_program *shader_prog,
@@ -675,12 +622,10 @@ brw_gs_emit(struct brw_context *brw,
              void *mem_ctx,
              unsigned *final_assembly_size)
  {
-   if (unlikely(INTEL_DEBUG & DEBUG_GS)) {
-      struct brw_shader *shader =
-         (brw_shader *) prog->_LinkedShaders[MESA_SHADER_GEOMETRY];
+   struct gl_shader *shader = prog->_LinkedShaders[MESA_SHADER_GEOMETRY];
  
-      brw_dump_ir("geometry", prog, &shader->base, NULL);
-   }
+   if (unlikely(INTEL_DEBUG & DEBUG_GS))
+      brw_dump_ir("geometry", prog, shader, NULL);
  
     int st_index = -1;
     if (INTEL_DEBUG & DEBUG_SHADER_TIME)
@@ -696,7 +641,8 @@ brw_gs_emit(struct brw_context *brw,
           c->prog_data.base.dispatch_mode = DISPATCH_MODE_4X2_DUAL_OBJECT;
  
           vec4_gs_visitor v(brw->intelScreen->compiler, brw,
-                           c, prog, mem_ctx, true /* no_spills */, st_index);
+                           c, prog, shader->Program->nir,
+                           mem_ctx, true /* no_spills */, st_index);
           if (v.run()) {
              return generate_assembly(brw, prog, &c->gp->program.Base,
                                       &c->prog_data.base, mem_ctx, v.cfg,
@@ -738,11 +684,13 @@ brw_gs_emit(struct brw_context *brw,
  
     if (brw->gen >= 7)
        gs = new vec4_gs_visitor(brw->intelScreen->compiler, brw,
-                               c, prog, mem_ctx, false /* no_spills */,
+                               c, prog, shader->Program->nir,
+                               mem_ctx, false /* no_spills */,
                                 st_index);
     else
        gs = new gen6_gs_visitor(brw->intelScreen->compiler, brw,
-                               c, prog, mem_ctx, false /* no_spills */,
+                               c, prog, shader->Program->nir,
+                               mem_ctx, false /* no_spills */,
                                 st_index);
  
     if (!gs->run()) {
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h

index 0e8fefabecc6d235cc2832de1308a5dbfac66024..85d80b8fc6327b6201795ed1249d8e2357c50329 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h
@@ -71,11 +71,12 @@ public:
                     void *log_data,
                     struct brw_gs_compile *c,
                     struct gl_shader_program *prog,
+                   nir_shader *shader,
                     void *mem_ctx,
                     bool no_spills,
                     int shader_time_index);
  
-   virtual void nir_setup_inputs(nir_shader *shader);
+   virtual void nir_setup_inputs();
     virtual void nir_setup_system_value_intrinsic(nir_intrinsic_instr *instr);
  
  protected:
@@ -83,13 +84,9 @@ protected:
                                                const glsl_type *type);
     virtual void setup_payload();
     virtual void emit_prolog();
-   virtual void emit_program_code();
     virtual void emit_thread_end();
     virtual void emit_urb_write_header(int mrf);
     virtual vec4_instruction *emit_urb_write_opcode(bool complete);
-   virtual int compute_array_stride(ir_dereference_array *ir);
-   virtual void visit(ir_emit_vertex *);
-   virtual void visit(ir_end_primitive *);
     virtual void gs_emit_vertex(int stream_id);
     virtual void gs_end_primitive();
     virtual void nir_emit_intrinsic(nir_intrinsic_instr *instr);
@@ -100,6 +97,8 @@ protected:
     void emit_control_data_bits();
     void set_stream_control_data_bits(unsigned stream_id);
  
+   struct gl_shader_program *shader_prog;
+
     src_reg vertex_count;
     src_reg control_data_bits;
     const struct brw_gs_compile * const c;
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp

index 9d56f9a1ae0d3a00ddcb0bc9768369f4aad60442..6536b1bfeb8666d0b3caf5bccd61163185fe8c85 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
@@ -35,15 +35,13 @@ namespace brw {
  void
  vec4_visitor::emit_nir_code()
  {
-   nir_shader *nir = prog->nir;
-
     if (nir->num_inputs > 0)
-      nir_setup_inputs(nir);
+      nir_setup_inputs();
  
     if (nir->num_uniforms > 0)
-      nir_setup_uniforms(nir);
+      nir_setup_uniforms();
  
-   nir_setup_system_values(nir);
+   nir_setup_system_values();
  
     /* get the main function and emit it */
     nir_foreach_overload(nir, overload) {
@@ -105,11 +103,11 @@ setup_system_values_block(nir_block *block, void *void_visitor)
  }
  
  void
-vec4_visitor::nir_setup_system_values(nir_shader *shader)
+vec4_visitor::nir_setup_system_values()
  {
     nir_system_values = ralloc_array(mem_ctx, dst_reg, SYSTEM_VALUE_MAX);
  
-   nir_foreach_overload(shader, overload) {
+   nir_foreach_overload(nir, overload) {
        assert(strcmp(overload->function->name, "main") == 0);
        assert(overload->impl);
        nir_foreach_block(overload->impl, setup_system_values_block, this);
@@ -117,11 +115,11 @@ vec4_visitor::nir_setup_system_values(nir_shader *shader)
  }
  
  void
-vec4_visitor::nir_setup_inputs(nir_shader *shader)
+vec4_visitor::nir_setup_inputs()
  {
-   nir_inputs = ralloc_array(mem_ctx, src_reg, shader->num_inputs);
+   nir_inputs = ralloc_array(mem_ctx, src_reg, nir->num_inputs);
  
-   foreach_list_typed(nir_variable, var, node, &shader->inputs) {
+   foreach_list_typed(nir_variable, var, node, &nir->inputs) {
        int offset = var->data.driver_location;
        unsigned size = type_size_vec4(var->type);
        for (unsigned i = 0; i < size; i++) {
@@ -132,137 +130,17 @@ vec4_visitor::nir_setup_inputs(nir_shader *shader)
  }
  
  void
-vec4_visitor::nir_setup_uniforms(nir_shader *shader)
-{
-   uniforms = 0;
-
-   if (shader_prog) {
-      foreach_list_typed(nir_variable, var, node, &shader->uniforms) {
-         /* UBO's, atomics and samplers don't take up space in the
-            uniform file */
-         if (var->interface_type != NULL || var->type->contains_atomic() ||
-             type_size_vec4(var->type) == 0) {
-            continue;
-         }
-
-         assert(uniforms < uniform_array_size);
-         uniform_size[uniforms] = type_size_vec4(var->type);
-
-         if (strncmp(var->name, "gl_", 3) == 0)
-            nir_setup_builtin_uniform(var);
-         else
-            nir_setup_uniform(var);
-      }
-   } else {
-      /* For ARB_vertex_program, only a single "parameters" variable is
-       * generated to support uniform data.
-       */
-      nir_variable *var = (nir_variable *) shader->uniforms.get_head();
-      assert(shader->uniforms.length() == 1 &&
-             strcmp(var->name, "parameters") == 0);
-
-      assert(uniforms < uniform_array_size);
-      uniform_size[uniforms] = type_size_vec4(var->type);
-
-      struct gl_program_parameter_list *plist = prog->Parameters;
-      for (unsigned p = 0; p < plist->NumParameters; p++) {
-         uniform_vector_size[uniforms] = plist->Parameters[p].Size;
-
-         /* Parameters should be either vec4 uniforms or single component
-          * constants; matrices and other larger types should have been broken
-          * down earlier.
-          */
-         assert(uniform_vector_size[uniforms] <= 4);
-
-         int i;
-         for (i = 0; i < uniform_vector_size[uniforms]; i++) {
-            stage_prog_data->param[uniforms * 4 + i] = &plist->ParameterValues[p][i];
-         }
-         for (; i < 4; i++) {
-            static const gl_constant_value zero = { 0.0 };
-            stage_prog_data->param[uniforms * 4 + i] = &zero;
-         }
-
-         uniforms++;
-      }
-   }
-}
-
-void
-vec4_visitor::nir_setup_uniform(nir_variable *var)
-{
-   int namelen = strlen(var->name);
-
-   /* The data for our (non-builtin) uniforms is stored in a series of
-    * gl_uniform_driver_storage structs for each subcomponent that
-    * glGetUniformLocation() could name.  We know it's been set up in the same
-    * order we'd walk the type, so walk the list of storage and find anything
-    * with our name, or the prefix of a component that starts with our name.
-    */
-    for (unsigned u = 0; u < shader_prog->NumUniformStorage; u++) {
-       struct gl_uniform_storage *storage = &shader_prog->UniformStorage[u];
-
-       if (storage->builtin)
-          continue;
-
-       if (strncmp(var->name, storage->name, namelen) != 0 ||
-           (storage->name[namelen] != 0 &&
-            storage->name[namelen] != '.' &&
-            storage->name[namelen] != '[')) {
-          continue;
-       }
-
-       gl_constant_value *components = storage->storage;
-       unsigned vector_count = (MAX2(storage->array_elements, 1) *
-                                storage->type->matrix_columns);
-
-       for (unsigned s = 0; s < vector_count; s++) {
-          assert(uniforms < uniform_array_size);
-          uniform_vector_size[uniforms] = storage->type->vector_elements;
-
-          int i;
-          for (i = 0; i < uniform_vector_size[uniforms]; i++) {
-             stage_prog_data->param[uniforms * 4 + i] = components;
-             components++;
-          }
-          for (; i < 4; i++) {
-             static const gl_constant_value zero = { 0.0 };
-             stage_prog_data->param[uniforms * 4 + i] = &zero;
-          }
-
-          uniforms++;
-       }
-    }
-}
-
-void
-vec4_visitor::nir_setup_builtin_uniform(nir_variable *var)
+vec4_visitor::nir_setup_uniforms()
  {
-   const nir_state_slot *const slots = var->state_slots;
-   assert(var->state_slots != NULL);
-
-   for (unsigned int i = 0; i < var->num_state_slots; i++) {
-      /* This state reference has already been setup by ir_to_mesa,
-       * but we'll get the same index back here.  We can reference
-       * ParameterValues directly, since unlike brw_fs.cpp, we never
-       * add new state references during compile.
-       */
-      int index = _mesa_add_state_reference(prog->Parameters,
-                                           (gl_state_index *)slots[i].tokens);
-      gl_constant_value *values =
-         &prog->Parameters->ParameterValues[index][0];
-
-      assert(uniforms < uniform_array_size);
+   uniforms = nir->num_uniforms;
  
-      for (unsigned j = 0; j < 4; j++)
-         stage_prog_data->param[uniforms * 4 + j] =
-            &values[GET_SWZ(slots[i].swizzle, j)];
-
-      uniform_vector_size[uniforms] =
-         (var->type->is_scalar() || var->type->is_vector() ||
-          var->type->is_matrix() ? var->type->vector_elements : 4);
+   foreach_list_typed(nir_variable, var, node, &nir->uniforms) {
+      /* UBO's and atomics don't take up space in the uniform file */
+      if (var->interface_type != NULL || var->type->contains_atomic())
+         continue;
  
-      uniforms++;
+      if (type_size_vec4(var->type) > 0)
+         uniform_size[var->data.driver_location] = type_size_vec4(var->type);
     }
  }
  
@@ -547,8 +425,6 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
        nir_const_value *const_uniform_block = nir_src_as_const_value(instr->src[0]);
        unsigned ubo_index = const_uniform_block ? const_uniform_block->u[0] : 0;
  
-      assert(shader->base.UniformBlocks[ubo_index].IsShaderStorage);
-
        src_reg surf_index = src_reg(prog_data->base.binding_table.ubo_start +
                                     ubo_index);
        dst_reg result_dst = get_nir_dest(instr->dest);
@@ -592,7 +468,7 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
  
           brw_mark_surface_used(&prog_data->base,
                                 prog_data->base.binding_table.ubo_start +
-                               shader_prog->NumBufferInterfaceBlocks - 1);
+                               nir->info.num_ssbos - 1);
        }
  
        /* Offset */
@@ -739,7 +615,7 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
            */
           brw_mark_surface_used(&prog_data->base,
                                 prog_data->base.binding_table.ubo_start +
-                               shader_prog->NumBufferInterfaceBlocks - 1);
+                               nir->info.num_ssbos - 1);
        }
  
        src_reg offset_reg = src_reg(this, glsl_type::uint_type);
@@ -891,7 +767,7 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
            */
           brw_mark_surface_used(&prog_data->base,
                                 prog_data->base.binding_table.ubo_start +
-                               shader_prog->NumBufferInterfaceBlocks - 1);
+                               nir->info.num_ssbos - 1);
        }
  
        unsigned const_offset = instr->const_index[1];
@@ -923,6 +799,15 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
        break;
     }
  
+   case nir_intrinsic_memory_barrier: {
+      const vec4_builder bld =
+         vec4_builder(this).at_end().annotate(current_annotation, base_ir);
+      const dst_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_UD, 2);
+      bld.emit(SHADER_OPCODE_MEMORY_FENCE, tmp)
+         ->regs_written = 2;
+      break;
+   }
+
     default:
        unreachable("Unknown intrinsic");
     }
@@ -952,7 +837,7 @@ vec4_visitor::nir_emit_ssbo_atomic(int op, nir_intrinsic_instr *instr)
         */
        brw_mark_surface_used(&prog_data->base,
                              prog_data->base.binding_table.ubo_start +
-                            shader_prog->NumBufferInterfaceBlocks - 1);
+                            nir->info.num_ssbos - 1);
     }
  
     src_reg offset = get_nir_src(instr->src[1], 1);
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp

index 6d61112056ca2bd3dc81b7f204cdee9aeda665d7..f80425a5b00f273cbcbb48ca698a619f8ca0fc08 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -580,15 +580,6 @@ vec4_visitor::emit_pack_snorm_4x8(const dst_reg &dst, const src_reg &src0)
     emit(VEC4_OPCODE_PACK_BYTES, dst, bytes);
  }
  
-void
-vec4_visitor::visit_instructions(const exec_list *list)
-{
-   foreach_in_list(ir_instruction, ir, list) {
-      base_ir = ir;
-      ir->accept(this);
-   }
-}
-
  /**
   * Returns the minimum number of vec4 elements needed to pack a type.
   *
@@ -680,1808 +671,161 @@ src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type, int size)
     this->swizzle = BRW_SWIZZLE_NOOP;
  
     this->type = brw_type_for_base_type(type);
-}
-
-dst_reg::dst_reg(class vec4_visitor *v, const struct glsl_type *type)
-{
-   init();
-
-   this->file = GRF;
-   this->reg = v->alloc.allocate(type_size_vec4(type));
-
-   if (type->is_array() || type->is_record()) {
-      this->writemask = WRITEMASK_XYZW;
-   } else {
-      this->writemask = (1 << type->vector_elements) - 1;
-   }
-
-   this->type = brw_type_for_base_type(type);
-}
-
-void
-vec4_visitor::setup_vec4_uniform_value(unsigned param_offset,
-                                       const gl_constant_value *values,
-                                       unsigned n)
-{
-   static const gl_constant_value zero = { 0 };
-
-   assert(param_offset % 4 == 0);
-
-   for (unsigned i = 0; i < n; ++i)
-      stage_prog_data->param[param_offset + i] = &values[i];
-
-   for (unsigned i = n; i < 4; ++i)
-      stage_prog_data->param[param_offset + i] = &zero;
-
-   uniform_vector_size[param_offset / 4] = n;
-}
-
-/* Our support for uniforms is piggy-backed on the struct
- * gl_fragment_program, because that's where the values actually
- * get stored, rather than in some global gl_shader_program uniform
- * store.
- */
-void
-vec4_visitor::setup_uniform_values(ir_variable *ir)
-{
-   int namelen = strlen(ir->name);
-
-   /* The data for our (non-builtin) uniforms is stored in a series of
-    * gl_uniform_driver_storage structs for each subcomponent that
-    * glGetUniformLocation() could name.  We know it's been set up in the same
-    * order we'd walk the type, so walk the list of storage and find anything
-    * with our name, or the prefix of a component that starts with our name.
-    */
-   for (unsigned u = 0; u < shader_prog->NumUniformStorage; u++) {
-      struct gl_uniform_storage *storage = &shader_prog->UniformStorage[u];
-
-      if (storage->builtin)
-         continue;
-
-      if (strncmp(ir->name, storage->name, namelen) != 0 ||
-          (storage->name[namelen] != 0 &&
-           storage->name[namelen] != '.' &&
-           storage->name[namelen] != '[')) {
-         continue;
-      }
-
-      const unsigned vector_count = (MAX2(storage->array_elements, 1) *
-                                     storage->type->matrix_columns);
-      const unsigned vector_size = storage->type->vector_elements;
-
-      for (unsigned s = 0; s < vector_count; s++) {
-         setup_vec4_uniform_value(uniforms * 4,
-                                  &storage->storage[s * vector_size],
-                                  vector_size);
-         uniforms++;
-      }
-   }
-}
-
-/* Our support for builtin uniforms is even scarier than non-builtin.
- * It sits on top of the PROG_STATE_VAR parameters that are
- * automatically updated from GL context state.
- */
-void
-vec4_visitor::setup_builtin_uniform_values(ir_variable *ir)
-{
-   const ir_state_slot *const slots = ir->get_state_slots();
-   assert(slots != NULL);
-
-   for (unsigned int i = 0; i < ir->get_num_state_slots(); i++) {
-      /* This state reference has already been setup by ir_to_mesa,
-       * but we'll get the same index back here.  We can reference
-       * ParameterValues directly, since unlike brw_fs.cpp, we never
-       * add new state references during compile.
-       */
-      int index = _mesa_add_state_reference(this->prog->Parameters,
-                                           (gl_state_index *)slots[i].tokens);
-      gl_constant_value *values =
-         &this->prog->Parameters->ParameterValues[index][0];
-
-      assert(this->uniforms < uniform_array_size);
-
-      for (unsigned j = 0; j < 4; j++)
-        stage_prog_data->param[this->uniforms * 4 + j] =
-            &values[GET_SWZ(slots[i].swizzle, j)];
-
-      this->uniform_vector_size[this->uniforms] =
-         (ir->type->is_scalar() || ir->type->is_vector() ||
-          ir->type->is_matrix() ? ir->type->vector_elements : 4);
-
-      this->uniforms++;
-   }
-}
-
-dst_reg *
-vec4_visitor::variable_storage(ir_variable *var)
-{
-   return (dst_reg *)hash_table_find(this->variable_ht, var);
-}
-
-void
-vec4_visitor::emit_bool_to_cond_code(ir_rvalue *ir,
-                                     enum brw_predicate *predicate)
-{
-   ir_expression *expr = ir->as_expression();
-
-   *predicate = BRW_PREDICATE_NORMAL;
-
-   if (expr && expr->operation != ir_binop_ubo_load) {
-      src_reg op[3];
-      vec4_instruction *inst;
-
-      assert(expr->get_num_operands() <= 3);
-      for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
-        expr->operands[i]->accept(this);
-        op[i] = this->result;
-
-        resolve_ud_negate(&op[i]);
-      }
-
-      switch (expr->operation) {
-      case ir_unop_logic_not:
-        inst = emit(AND(dst_null_d(), op[0], src_reg(1)));
-        inst->conditional_mod = BRW_CONDITIONAL_Z;
-        break;
-
-      case ir_binop_logic_xor:
-         if (devinfo->gen <= 5) {
-            src_reg temp = src_reg(this, ir->type);
-            emit(XOR(dst_reg(temp), op[0], op[1]));
-            inst = emit(AND(dst_null_d(), temp, src_reg(1)));
-         } else {
-            inst = emit(XOR(dst_null_d(), op[0], op[1]));
-         }
-         inst->conditional_mod = BRW_CONDITIONAL_NZ;
-        break;
-
-      case ir_binop_logic_or:
-         if (devinfo->gen <= 5) {
-            src_reg temp = src_reg(this, ir->type);
-            emit(OR(dst_reg(temp), op[0], op[1]));
-            inst = emit(AND(dst_null_d(), temp, src_reg(1)));
-         } else {
-            inst = emit(OR(dst_null_d(), op[0], op[1]));
-         }
-         inst->conditional_mod = BRW_CONDITIONAL_NZ;
-        break;
-
-      case ir_binop_logic_and:
-         if (devinfo->gen <= 5) {
-            src_reg temp = src_reg(this, ir->type);
-            emit(AND(dst_reg(temp), op[0], op[1]));
-            inst = emit(AND(dst_null_d(), temp, src_reg(1)));
-         } else {
-            inst = emit(AND(dst_null_d(), op[0], op[1]));
-         }
-         inst->conditional_mod = BRW_CONDITIONAL_NZ;
-        break;
-
-      case ir_unop_f2b:
-        if (devinfo->gen >= 6) {
-           emit(CMP(dst_null_d(), op[0], src_reg(0.0f), BRW_CONDITIONAL_NZ));
-        } else {
-           inst = emit(MOV(dst_null_f(), op[0]));
-           inst->conditional_mod = BRW_CONDITIONAL_NZ;
-        }
-        break;
-
-      case ir_unop_i2b:
-        if (devinfo->gen >= 6) {
-           emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));
-        } else {
-           inst = emit(MOV(dst_null_d(), op[0]));
-           inst->conditional_mod = BRW_CONDITIONAL_NZ;
-        }
-        break;
-
-      case ir_binop_all_equal:
-         if (devinfo->gen <= 5) {
-            resolve_bool_comparison(expr->operands[0], &op[0]);
-            resolve_bool_comparison(expr->operands[1], &op[1]);
-         }
-        inst = emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_Z));
-        *predicate = BRW_PREDICATE_ALIGN16_ALL4H;
-        break;
-
-      case ir_binop_any_nequal:
-         if (devinfo->gen <= 5) {
-            resolve_bool_comparison(expr->operands[0], &op[0]);
-            resolve_bool_comparison(expr->operands[1], &op[1]);
-         }
-        inst = emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_NZ));
-        *predicate = BRW_PREDICATE_ALIGN16_ANY4H;
-        break;
-
-      case ir_unop_any:
-         if (devinfo->gen <= 5) {
-            resolve_bool_comparison(expr->operands[0], &op[0]);
-         }
-        inst = emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));
-        *predicate = BRW_PREDICATE_ALIGN16_ANY4H;
-        break;
-
-      case ir_binop_greater:
-      case ir_binop_gequal:
-      case ir_binop_less:
-      case ir_binop_lequal:
-      case ir_binop_equal:
-      case ir_binop_nequal:
-         if (devinfo->gen <= 5) {
-            resolve_bool_comparison(expr->operands[0], &op[0]);
-            resolve_bool_comparison(expr->operands[1], &op[1]);
-         }
-        emit(CMP(dst_null_d(), op[0], op[1],
-                 brw_conditional_for_comparison(expr->operation)));
-        break;
-
-      case ir_triop_csel: {
-         /* Expand the boolean condition into the flag register. */
-         inst = emit(MOV(dst_null_d(), op[0]));
-         inst->conditional_mod = BRW_CONDITIONAL_NZ;
-
-         /* Select which boolean to return. */
-         dst_reg temp(this, expr->operands[1]->type);
-         inst = emit(BRW_OPCODE_SEL, temp, op[1], op[2]);
-         inst->predicate = BRW_PREDICATE_NORMAL;
-
-         /* Expand the result to a condition code. */
-         inst = emit(MOV(dst_null_d(), src_reg(temp)));
-         inst->conditional_mod = BRW_CONDITIONAL_NZ;
-         break;
-      }
-
-      default:
-        unreachable("not reached");
-      }
-      return;
-   }
-
-   ir->accept(this);
-
-   resolve_ud_negate(&this->result);
-
-   vec4_instruction *inst = emit(AND(dst_null_d(), this->result, src_reg(1)));
-   inst->conditional_mod = BRW_CONDITIONAL_NZ;
-}
-
-/**
- * Emit a gen6 IF statement with the comparison folded into the IF
- * instruction.
- */
-void
-vec4_visitor::emit_if_gen6(ir_if *ir)
-{
-   ir_expression *expr = ir->condition->as_expression();
-
-   if (expr && expr->operation != ir_binop_ubo_load) {
-      src_reg op[3];
-      dst_reg temp;
-
-      assert(expr->get_num_operands() <= 3);
-      for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
-        expr->operands[i]->accept(this);
-        op[i] = this->result;
-      }
-
-      switch (expr->operation) {
-      case ir_unop_logic_not:
-        emit(IF(op[0], src_reg(0), BRW_CONDITIONAL_Z));
-        return;
-
-      case ir_binop_logic_xor:
-        emit(IF(op[0], op[1], BRW_CONDITIONAL_NZ));
-        return;
-
-      case ir_binop_logic_or:
-        temp = dst_reg(this, glsl_type::bool_type);
-        emit(OR(temp, op[0], op[1]));
-        emit(IF(src_reg(temp), src_reg(0), BRW_CONDITIONAL_NZ));
-        return;
-
-      case ir_binop_logic_and:
-        temp = dst_reg(this, glsl_type::bool_type);
-        emit(AND(temp, op[0], op[1]));
-        emit(IF(src_reg(temp), src_reg(0), BRW_CONDITIONAL_NZ));
-        return;
-
-      case ir_unop_f2b:
-        emit(IF(op[0], src_reg(0), BRW_CONDITIONAL_NZ));
-        return;
-
-      case ir_unop_i2b:
-        emit(IF(op[0], src_reg(0), BRW_CONDITIONAL_NZ));
-        return;
-
-      case ir_binop_greater:
-      case ir_binop_gequal:
-      case ir_binop_less:
-      case ir_binop_lequal:
-      case ir_binop_equal:
-      case ir_binop_nequal:
-        emit(IF(op[0], op[1],
-                brw_conditional_for_comparison(expr->operation)));
-        return;
-
-      case ir_binop_all_equal:
-        emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_Z));
-        emit(IF(BRW_PREDICATE_ALIGN16_ALL4H));
-        return;
-
-      case ir_binop_any_nequal:
-        emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_NZ));
-        emit(IF(BRW_PREDICATE_ALIGN16_ANY4H));
-        return;
-
-      case ir_unop_any:
-        emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));
-        emit(IF(BRW_PREDICATE_ALIGN16_ANY4H));
-        return;
-
-      case ir_triop_csel: {
-         /* Expand the boolean condition into the flag register. */
-         vec4_instruction *inst = emit(MOV(dst_null_d(), op[0]));
-         inst->conditional_mod = BRW_CONDITIONAL_NZ;
-
-         /* Select which boolean to return. */
-         dst_reg temp(this, expr->operands[1]->type);
-         inst = emit(BRW_OPCODE_SEL, temp, op[1], op[2]);
-         inst->predicate = BRW_PREDICATE_NORMAL;
-
-         emit(IF(src_reg(temp), src_reg(0), BRW_CONDITIONAL_NZ));
-         return;
-      }
-
-      default:
-        unreachable("not reached");
-      }
-      return;
-   }
-
-   ir->condition->accept(this);
-
-   emit(IF(this->result, src_reg(0), BRW_CONDITIONAL_NZ));
-}
-
-void
-vec4_visitor::visit(ir_variable *ir)
-{
-   dst_reg *reg = NULL;
-
-   if (variable_storage(ir))
-      return;
-
-   switch (ir->data.mode) {
-   case ir_var_shader_in:
-      assert(ir->data.location != -1);
-      reg = new(mem_ctx) dst_reg(ATTR, ir->data.location);
-      break;
-
-   case ir_var_shader_out:
-      assert(ir->data.location != -1);
-      reg = new(mem_ctx) dst_reg(this, ir->type);
-
-      for (int i = 0; i < type_size_vec4(ir->type); i++) {
-        output_reg[ir->data.location + i] = *reg;
-        output_reg[ir->data.location + i].reg_offset = i;
-        output_reg_annotation[ir->data.location + i] = ir->name;
-      }
-      break;
-
-   case ir_var_auto:
-   case ir_var_temporary:
-      reg = new(mem_ctx) dst_reg(this, ir->type);
-      break;
-
-   case ir_var_uniform:
-   case ir_var_shader_storage:
-      reg = new(this->mem_ctx) dst_reg(UNIFORM, this->uniforms);
-
-      /* Thanks to the lower_ubo_reference pass, we will see only
-       * ir_binop_{ubo,ssbo}_load expressions and not ir_dereference_variable
-       * for UBO/SSBO variables, so no need for them to be in variable_ht.
-       *
-       * Some uniforms, such as samplers and atomic counters, have no actual
-       * storage, so we should ignore them.
-       */
-      if (ir->is_in_buffer_block() || type_size_vec4(ir->type) == 0)
-         return;
-
-      /* Track how big the whole uniform variable is, in case we need to put a
-       * copy of its data into pull constants for array access.
-       */
-      assert(this->uniforms < uniform_array_size);
-      this->uniform_size[this->uniforms] = type_size_vec4(ir->type);
-
-      if (!strncmp(ir->name, "gl_", 3)) {
-        setup_builtin_uniform_values(ir);
-      } else {
-        setup_uniform_values(ir);
-      }
-      break;
-
-   case ir_var_system_value:
-      reg = make_reg_for_system_value(ir->data.location, ir->type);
-      break;
-
-   default:
-      unreachable("not reached");
-   }
-
-   reg->type = brw_type_for_base_type(ir->type);
-   hash_table_insert(this->variable_ht, reg, ir);
-}
-
-void
-vec4_visitor::visit(ir_loop *ir)
-{
-   /* We don't want debugging output to print the whole body of the
-    * loop as the annotation.
-    */
-   this->base_ir = NULL;
-
-   emit(BRW_OPCODE_DO);
-
-   visit_instructions(&ir->body_instructions);
-
-   emit(BRW_OPCODE_WHILE);
-}
-
-void
-vec4_visitor::visit(ir_loop_jump *ir)
-{
-   switch (ir->mode) {
-   case ir_loop_jump::jump_break:
-      emit(BRW_OPCODE_BREAK);
-      break;
-   case ir_loop_jump::jump_continue:
-      emit(BRW_OPCODE_CONTINUE);
-      break;
-   }
-}
-
-
-void
-vec4_visitor::visit(ir_function_signature *)
-{
-   unreachable("not reached");
-}
-
-void
-vec4_visitor::visit(ir_function *ir)
-{
-   /* Ignore function bodies other than main() -- we shouldn't see calls to
-    * them since they should all be inlined.
-    */
-   if (strcmp(ir->name, "main") == 0) {
-      const ir_function_signature *sig;
-      exec_list empty;
-
-      sig = ir->matching_signature(NULL, &empty, false);
-
-      assert(sig);
-
-      visit_instructions(&sig->body);
-   }
-}
-
-bool
-vec4_visitor::try_emit_mad(ir_expression *ir)
-{
-   /* 3-src instructions were introduced in gen6. */
-   if (devinfo->gen < 6)
-      return false;
-
-   /* MAD can only handle floating-point data. */
-   if (ir->type->base_type != GLSL_TYPE_FLOAT)
-      return false;
-
-   ir_rvalue *nonmul;
-   ir_expression *mul;
-   bool mul_negate, mul_abs;
-
-   for (int i = 0; i < 2; i++) {
-      mul_negate = false;
-      mul_abs = false;
-
-      mul = ir->operands[i]->as_expression();
-      nonmul = ir->operands[1 - i];
-
-      if (mul && mul->operation == ir_unop_abs) {
-         mul = mul->operands[0]->as_expression();
-         mul_abs = true;
-      } else if (mul && mul->operation == ir_unop_neg) {
-         mul = mul->operands[0]->as_expression();
-         mul_negate = true;
-      }
-
-      if (mul && mul->operation == ir_binop_mul)
-         break;
-   }
-
-   if (!mul || mul->operation != ir_binop_mul)
-      return false;
-
-   nonmul->accept(this);
-   src_reg src0 = fix_3src_operand(this->result);
-
-   mul->operands[0]->accept(this);
-   src_reg src1 = fix_3src_operand(this->result);
-   src1.negate ^= mul_negate;
-   src1.abs = mul_abs;
-   if (mul_abs)
-      src1.negate = false;
-
-   mul->operands[1]->accept(this);
-   src_reg src2 = fix_3src_operand(this->result);
-   src2.abs = mul_abs;
-   if (mul_abs)
-      src2.negate = false;
-
-   this->result = src_reg(this, ir->type);
-   emit(BRW_OPCODE_MAD, dst_reg(this->result), src0, src1, src2);
-
-   return true;
-}
-
-bool
-vec4_visitor::try_emit_b2f_of_compare(ir_expression *ir)
-{
-   /* This optimization relies on CMP setting the destination to 0 when
-    * false.  Early hardware only sets the least significant bit, and
-    * leaves the other bits undefined.  So we can't use it.
-    */
-   if (devinfo->gen < 6)
-      return false;
-
-   ir_expression *const cmp = ir->operands[0]->as_expression();
-
-   if (cmp == NULL)
-      return false;
-
-   switch (cmp->operation) {
-   case ir_binop_less:
-   case ir_binop_greater:
-   case ir_binop_lequal:
-   case ir_binop_gequal:
-   case ir_binop_equal:
-   case ir_binop_nequal:
-      break;
-
-   default:
-      return false;
-   }
-
-   cmp->operands[0]->accept(this);
-   const src_reg cmp_src0 = this->result;
-
-   cmp->operands[1]->accept(this);
-   const src_reg cmp_src1 = this->result;
-
-   this->result = src_reg(this, ir->type);
-
-   emit(CMP(dst_reg(this->result), cmp_src0, cmp_src1,
-            brw_conditional_for_comparison(cmp->operation)));
-
-   /* If the comparison is false, this->result will just happen to be zero.
-    */
-   vec4_instruction *const inst = emit(BRW_OPCODE_SEL, dst_reg(this->result),
-                                       this->result, src_reg(1.0f));
-   inst->predicate = BRW_PREDICATE_NORMAL;
-   inst->predicate_inverse = true;
-
-   return true;
-}
-
-vec4_instruction *
-vec4_visitor::emit_minmax(enum brw_conditional_mod conditionalmod, dst_reg dst,
-                          src_reg src0, src_reg src1)
-{
-   vec4_instruction *inst;
-
-   if (devinfo->gen >= 6) {
-      inst = emit(BRW_OPCODE_SEL, dst, src0, src1);
-      inst->conditional_mod = conditionalmod;
-   } else {
-      emit(CMP(dst, src0, src1, conditionalmod));
-
-      inst = emit(BRW_OPCODE_SEL, dst, src0, src1);
-      inst->predicate = BRW_PREDICATE_NORMAL;
-   }
-
-   return inst;
-}
-
-vec4_instruction *
-vec4_visitor::emit_lrp(const dst_reg &dst,
-                       const src_reg &x, const src_reg &y, const src_reg &a)
-{
-   if (devinfo->gen >= 6) {
-      /* Note that the instruction's argument order is reversed from GLSL
-       * and the IR.
-       */
-     return emit(LRP(dst, fix_3src_operand(a), fix_3src_operand(y),
-                     fix_3src_operand(x)));
-   } else {
-      /* Earlier generations don't support three source operations, so we
-       * need to emit x*(1-a) + y*a.
-       */
-      dst_reg y_times_a           = dst_reg(this, glsl_type::vec4_type);
-      dst_reg one_minus_a         = dst_reg(this, glsl_type::vec4_type);
-      dst_reg x_times_one_minus_a = dst_reg(this, glsl_type::vec4_type);
-      y_times_a.writemask           = dst.writemask;
-      one_minus_a.writemask         = dst.writemask;
-      x_times_one_minus_a.writemask = dst.writemask;
-
-      emit(MUL(y_times_a, y, a));
-      emit(ADD(one_minus_a, negate(a), src_reg(1.0f)));
-      emit(MUL(x_times_one_minus_a, x, src_reg(one_minus_a)));
-      return emit(ADD(dst, src_reg(x_times_one_minus_a), src_reg(y_times_a)));
-   }
-}
-
-/**
- * Emits the instructions needed to perform a pull constant load. before_block
- * and before_inst can be NULL in which case the instruction will be appended
- * to the end of the instruction list.
- */
-void
-vec4_visitor::emit_pull_constant_load_reg(dst_reg dst,
-                                          src_reg surf_index,
-                                          src_reg offset_reg,
-                                          bblock_t *before_block,
-                                          vec4_instruction *before_inst)
-{
-   assert((before_inst == NULL && before_block == NULL) ||
-          (before_inst && before_block));
-
-   vec4_instruction *pull;
-
-   if (devinfo->gen >= 9) {
-      /* Gen9+ needs a message header in order to use SIMD4x2 mode */
-      src_reg header(this, glsl_type::uvec4_type, 2);
-
-      pull = new(mem_ctx)
-         vec4_instruction(VS_OPCODE_SET_SIMD4X2_HEADER_GEN9,
-                          dst_reg(header));
-
-      if (before_inst)
-         emit_before(before_block, before_inst, pull);
-      else
-         emit(pull);
-
-      dst_reg index_reg = retype(offset(dst_reg(header), 1),
-                                 offset_reg.type);
-      pull = MOV(writemask(index_reg, WRITEMASK_X), offset_reg);
-
-      if (before_inst)
-         emit_before(before_block, before_inst, pull);
-      else
-         emit(pull);
-
-      pull = new(mem_ctx) vec4_instruction(VS_OPCODE_PULL_CONSTANT_LOAD_GEN7,
-                                           dst,
-                                           surf_index,
-                                           header);
-      pull->mlen = 2;
-      pull->header_size = 1;
-   } else if (devinfo->gen >= 7) {
-      dst_reg grf_offset = dst_reg(this, glsl_type::int_type);
-
-      grf_offset.type = offset_reg.type;
-
-      pull = MOV(grf_offset, offset_reg);
-
-      if (before_inst)
-         emit_before(before_block, before_inst, pull);
-      else
-         emit(pull);
-
-      pull = new(mem_ctx) vec4_instruction(VS_OPCODE_PULL_CONSTANT_LOAD_GEN7,
-                                           dst,
-                                           surf_index,
-                                           src_reg(grf_offset));
-      pull->mlen = 1;
-   } else {
-      pull = new(mem_ctx) vec4_instruction(VS_OPCODE_PULL_CONSTANT_LOAD,
-                                           dst,
-                                           surf_index,
-                                           offset_reg);
-      pull->base_mrf = FIRST_SPILL_MRF(devinfo->gen) + 1;
-      pull->mlen = 1;
-   }
-
-   if (before_inst)
-      emit_before(before_block, before_inst, pull);
-   else
-      emit(pull);
-}
-
-src_reg
-vec4_visitor::emit_uniformize(const src_reg &src)
-{
-   const src_reg chan_index(this, glsl_type::uint_type);
-   const dst_reg dst = retype(dst_reg(this, glsl_type::uint_type),
-                              src.type);
-
-   emit(SHADER_OPCODE_FIND_LIVE_CHANNEL, dst_reg(chan_index))
-      ->force_writemask_all = true;
-   emit(SHADER_OPCODE_BROADCAST, dst, src, chan_index)
-      ->force_writemask_all = true;
-
-   return src_reg(dst);
-}
-
-void
-vec4_visitor::visit(ir_expression *ir)
-{
-   unsigned int operand;
-   src_reg op[ARRAY_SIZE(ir->operands)];
-   vec4_instruction *inst;
-
-   if (ir->operation == ir_binop_add) {
-      if (try_emit_mad(ir))
-        return;
-   }
-
-   if (ir->operation == ir_unop_b2f) {
-      if (try_emit_b2f_of_compare(ir))
-        return;
-   }
-
-   /* Storage for our result.  Ideally for an assignment we'd be using
-    * the actual storage for the result here, instead.
-    */
-   dst_reg result_dst(this, ir->type);
-   src_reg result_src(result_dst);
-
-   if (ir->operation == ir_triop_csel) {
-      ir->operands[1]->accept(this);
-      op[1] = this->result;
-      ir->operands[2]->accept(this);
-      op[2] = this->result;
-
-      enum brw_predicate predicate;
-      emit_bool_to_cond_code(ir->operands[0], &predicate);
-      inst = emit(BRW_OPCODE_SEL, result_dst, op[1], op[2]);
-      inst->predicate = predicate;
-      this->result = result_src;
-      return;
-   }
-
-   for (operand = 0; operand < ir->get_num_operands(); operand++) {
-      this->result.file = BAD_FILE;
-      ir->operands[operand]->accept(this);
-      if (this->result.file == BAD_FILE) {
-        fprintf(stderr, "Failed to get tree for expression operand:\n");
-        ir->operands[operand]->fprint(stderr);
-        exit(1);
-      }
-      op[operand] = this->result;
-
-      /* Matrix expression operands should have been broken down to vector
-       * operations already.
-       */
-      assert(!ir->operands[operand]->type->is_matrix());
-   }
-
-   /* If nothing special happens, this is the result. */
-   this->result = result_src;
-
-   switch (ir->operation) {
-   case ir_unop_logic_not:
-      emit(NOT(result_dst, op[0]));
-      break;
-   case ir_unop_neg:
-      op[0].negate = !op[0].negate;
-      emit(MOV(result_dst, op[0]));
-      break;
-   case ir_unop_abs:
-      op[0].abs = true;
-      op[0].negate = false;
-      emit(MOV(result_dst, op[0]));
-      break;
-
-   case ir_unop_sign:
-      if (ir->type->is_float()) {
-         /* AND(val, 0x80000000) gives the sign bit.
-          *
-          * Predicated OR ORs 1.0 (0x3f800000) with the sign bit if val is not
-          * zero.
-          */
-         emit(CMP(dst_null_f(), op[0], src_reg(0.0f), BRW_CONDITIONAL_NZ));
-
-         op[0].type = BRW_REGISTER_TYPE_UD;
-         result_dst.type = BRW_REGISTER_TYPE_UD;
-         emit(AND(result_dst, op[0], src_reg(0x80000000u)));
-
-         inst = emit(OR(result_dst, src_reg(result_dst), src_reg(0x3f800000u)));
-         inst->predicate = BRW_PREDICATE_NORMAL;
-
-         this->result.type = BRW_REGISTER_TYPE_F;
-      } else {
-         /*  ASR(val, 31) -> negative val generates 0xffffffff (signed -1).
-          *               -> non-negative val generates 0x00000000.
-          *  Predicated OR sets 1 if val is positive.
-          */
-         emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_G));
-
-         emit(ASR(result_dst, op[0], src_reg(31)));
-
-         inst = emit(OR(result_dst, src_reg(result_dst), src_reg(1)));
-         inst->predicate = BRW_PREDICATE_NORMAL;
-      }
-      break;
-
-   case ir_unop_rcp:
-      emit_math(SHADER_OPCODE_RCP, result_dst, op[0]);
-      break;
-
-   case ir_unop_exp2:
-      emit_math(SHADER_OPCODE_EXP2, result_dst, op[0]);
-      break;
-   case ir_unop_log2:
-      emit_math(SHADER_OPCODE_LOG2, result_dst, op[0]);
-      break;
-   case ir_unop_exp:
-   case ir_unop_log:
-      unreachable("not reached: should be handled by ir_explog_to_explog2");
-   case ir_unop_sin:
-      emit_math(SHADER_OPCODE_SIN, result_dst, op[0]);
-      break;
-   case ir_unop_cos:
-      emit_math(SHADER_OPCODE_COS, result_dst, op[0]);
-      break;
-
-   case ir_unop_dFdx:
-   case ir_unop_dFdx_coarse:
-   case ir_unop_dFdx_fine:
-   case ir_unop_dFdy:
-   case ir_unop_dFdy_coarse:
-   case ir_unop_dFdy_fine:
-      unreachable("derivatives not valid in vertex shader");
-
-   case ir_unop_bitfield_reverse:
-      emit(BFREV(result_dst, op[0]));
-      break;
-   case ir_unop_bit_count:
-      emit(CBIT(result_dst, op[0]));
-      break;
-   case ir_unop_find_msb: {
-      src_reg temp = src_reg(this, glsl_type::uint_type);
-
-      inst = emit(FBH(dst_reg(temp), op[0]));
-      inst->dst.writemask = WRITEMASK_XYZW;
-
-      /* FBH counts from the MSB side, while GLSL's findMSB() wants the count
-       * from the LSB side. If FBH didn't return an error (0xFFFFFFFF), then
-       * subtract the result from 31 to convert the MSB count into an LSB count.
-       */
-
-      /* FBH only supports UD type for dst, so use a MOV to convert UD to D. */
-      temp.swizzle = BRW_SWIZZLE_NOOP;
-      emit(MOV(result_dst, temp));
-
-      src_reg src_tmp = src_reg(result_dst);
-      emit(CMP(dst_null_d(), src_tmp, src_reg(-1), BRW_CONDITIONAL_NZ));
-
-      src_tmp.negate = true;
-      inst = emit(ADD(result_dst, src_tmp, src_reg(31)));
-      inst->predicate = BRW_PREDICATE_NORMAL;
-      break;
-   }
-   case ir_unop_find_lsb:
-      emit(FBL(result_dst, op[0]));
-      break;
-   case ir_unop_saturate:
-      inst = emit(MOV(result_dst, op[0]));
-      inst->saturate = true;
-      break;
-
-   case ir_unop_noise:
-      unreachable("not reached: should be handled by lower_noise");
-
-   case ir_unop_subroutine_to_int:
-      emit(MOV(result_dst, op[0]));
-      break;
-
-   case ir_unop_ssbo_unsized_array_length:
-      unreachable("not reached: should be handled by lower_ubo_reference");
-      break;
-
-   case ir_binop_add:
-      emit(ADD(result_dst, op[0], op[1]));
-      break;
-   case ir_binop_sub:
-      unreachable("not reached: should be handled by ir_sub_to_add_neg");
-
-   case ir_binop_mul:
-      if (devinfo->gen < 8 && ir->type->is_integer()) {
-        /* For integer multiplication, the MUL uses the low 16 bits of one of
-         * the operands (src0 through SNB, src1 on IVB and later).  The MACH
-         * accumulates in the contribution of the upper 16 bits of that
-         * operand.  If we can determine that one of the args is in the low
-         * 16 bits, though, we can just emit a single MUL.
-          */
-         if (ir->operands[0]->is_uint16_constant()) {
-            if (devinfo->gen < 7)
-               emit(MUL(result_dst, op[0], op[1]));
-            else
-               emit(MUL(result_dst, op[1], op[0]));
-         } else if (ir->operands[1]->is_uint16_constant()) {
-            if (devinfo->gen < 7)
-               emit(MUL(result_dst, op[1], op[0]));
-            else
-               emit(MUL(result_dst, op[0], op[1]));
-         } else {
-            struct brw_reg acc = retype(brw_acc_reg(8), result_dst.type);
-
-            emit(MUL(acc, op[0], op[1]));
-            emit(MACH(dst_null_d(), op[0], op[1]));
-            emit(MOV(result_dst, src_reg(acc)));
-         }
-      } else {
-        emit(MUL(result_dst, op[0], op[1]));
-      }
-      break;
-   case ir_binop_imul_high: {
-      struct brw_reg acc = retype(brw_acc_reg(8), result_dst.type);
-
-      emit(MUL(acc, op[0], op[1]));
-      emit(MACH(result_dst, op[0], op[1]));
-      break;
-   }
-   case ir_binop_div:
-      /* Floating point should be lowered by DIV_TO_MUL_RCP in the compiler. */
-      assert(ir->type->is_integer());
-      emit_math(SHADER_OPCODE_INT_QUOTIENT, result_dst, op[0], op[1]);
-      break;
-
-   case ir_binop_carry:
-      unreachable("Should have been lowered by carry_to_arith().");
-
-   case ir_binop_borrow:
-      unreachable("Should have been lowered by borrow_to_arith().");
-
-   case ir_binop_mod:
-      /* Floating point should be lowered by MOD_TO_FLOOR in the compiler. */
-      assert(ir->type->is_integer());
-      emit_math(SHADER_OPCODE_INT_REMAINDER, result_dst, op[0], op[1]);
-      break;
-
-   case ir_binop_less:
-   case ir_binop_greater:
-   case ir_binop_lequal:
-   case ir_binop_gequal:
-   case ir_binop_equal:
-   case ir_binop_nequal: {
-      if (devinfo->gen <= 5) {
-         resolve_bool_comparison(ir->operands[0], &op[0]);
-         resolve_bool_comparison(ir->operands[1], &op[1]);
-      }
-      emit(CMP(result_dst, op[0], op[1],
-              brw_conditional_for_comparison(ir->operation)));
-      break;
-   }
-
-   case ir_binop_all_equal:
-      if (devinfo->gen <= 5) {
-         resolve_bool_comparison(ir->operands[0], &op[0]);
-         resolve_bool_comparison(ir->operands[1], &op[1]);
-      }
-
-      /* "==" operator producing a scalar boolean. */
-      if (ir->operands[0]->type->is_vector() ||
-         ir->operands[1]->type->is_vector()) {
-        emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_Z));
-        emit(MOV(result_dst, src_reg(0)));
-         inst = emit(MOV(result_dst, src_reg(~0)));
-        inst->predicate = BRW_PREDICATE_ALIGN16_ALL4H;
-      } else {
-        emit(CMP(result_dst, op[0], op[1], BRW_CONDITIONAL_Z));
-      }
-      break;
-   case ir_binop_any_nequal:
-      if (devinfo->gen <= 5) {
-         resolve_bool_comparison(ir->operands[0], &op[0]);
-         resolve_bool_comparison(ir->operands[1], &op[1]);
-      }
-
-      /* "!=" operator producing a scalar boolean. */
-      if (ir->operands[0]->type->is_vector() ||
-         ir->operands[1]->type->is_vector()) {
-        emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_NZ));
-
-        emit(MOV(result_dst, src_reg(0)));
-         inst = emit(MOV(result_dst, src_reg(~0)));
-        inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
-      } else {
-        emit(CMP(result_dst, op[0], op[1], BRW_CONDITIONAL_NZ));
-      }
-      break;
-
-   case ir_unop_any:
-      if (devinfo->gen <= 5) {
-         resolve_bool_comparison(ir->operands[0], &op[0]);
-      }
-      emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));
-      emit(MOV(result_dst, src_reg(0)));
-
-      inst = emit(MOV(result_dst, src_reg(~0)));
-      inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
-      break;
-
-   case ir_binop_logic_xor:
-      emit(XOR(result_dst, op[0], op[1]));
-      break;
-
-   case ir_binop_logic_or:
-      emit(OR(result_dst, op[0], op[1]));
-      break;
-
-   case ir_binop_logic_and:
-      emit(AND(result_dst, op[0], op[1]));
-      break;
-
-   case ir_binop_dot:
-      assert(ir->operands[0]->type->is_vector());
-      assert(ir->operands[0]->type == ir->operands[1]->type);
-      emit_dp(result_dst, op[0], op[1], ir->operands[0]->type->vector_elements);
-      break;
-
-   case ir_unop_sqrt:
-      emit_math(SHADER_OPCODE_SQRT, result_dst, op[0]);
-      break;
-   case ir_unop_rsq:
-      emit_math(SHADER_OPCODE_RSQ, result_dst, op[0]);
-      break;
-
-   case ir_unop_bitcast_i2f:
-   case ir_unop_bitcast_u2f:
-      this->result = op[0];
-      this->result.type = BRW_REGISTER_TYPE_F;
-      break;
-
-   case ir_unop_bitcast_f2i:
-      this->result = op[0];
-      this->result.type = BRW_REGISTER_TYPE_D;
-      break;
-
-   case ir_unop_bitcast_f2u:
-      this->result = op[0];
-      this->result.type = BRW_REGISTER_TYPE_UD;
-      break;
-
-   case ir_unop_i2f:
-   case ir_unop_i2u:
-   case ir_unop_u2i:
-   case ir_unop_u2f:
-   case ir_unop_f2i:
-   case ir_unop_f2u:
-      emit(MOV(result_dst, op[0]));
-      break;
-   case ir_unop_b2i:
-   case ir_unop_b2f:
-      if (devinfo->gen <= 5) {
-         resolve_bool_comparison(ir->operands[0], &op[0]);
-      }
-      emit(MOV(result_dst, negate(op[0])));
-      break;
-   case ir_unop_f2b:
-      emit(CMP(result_dst, op[0], src_reg(0.0f), BRW_CONDITIONAL_NZ));
-      break;
-   case ir_unop_i2b:
-      emit(CMP(result_dst, op[0], src_reg(0), BRW_CONDITIONAL_NZ));
-      break;
-
-   case ir_unop_trunc:
-      emit(RNDZ(result_dst, op[0]));
-      break;
-   case ir_unop_ceil: {
-         src_reg tmp = src_reg(this, ir->type);
-         op[0].negate = !op[0].negate;
-         emit(RNDD(dst_reg(tmp), op[0]));
-         tmp.negate = true;
-         emit(MOV(result_dst, tmp));
-      }
-      break;
-   case ir_unop_floor:
-      inst = emit(RNDD(result_dst, op[0]));
-      break;
-   case ir_unop_fract:
-      inst = emit(FRC(result_dst, op[0]));
-      break;
-   case ir_unop_round_even:
-      emit(RNDE(result_dst, op[0]));
-      break;
-
-   case ir_unop_get_buffer_size:
-      unreachable("not reached: not implemented");
-      break;
-
-   case ir_binop_min:
-      emit_minmax(BRW_CONDITIONAL_L, result_dst, op[0], op[1]);
-      break;
-   case ir_binop_max:
-      emit_minmax(BRW_CONDITIONAL_GE, result_dst, op[0], op[1]);
-      break;
-
-   case ir_binop_pow:
-      emit_math(SHADER_OPCODE_POW, result_dst, op[0], op[1]);
-      break;
-
-   case ir_unop_bit_not:
-      inst = emit(NOT(result_dst, op[0]));
-      break;
-   case ir_binop_bit_and:
-      inst = emit(AND(result_dst, op[0], op[1]));
-      break;
-   case ir_binop_bit_xor:
-      inst = emit(XOR(result_dst, op[0], op[1]));
-      break;
-   case ir_binop_bit_or:
-      inst = emit(OR(result_dst, op[0], op[1]));
-      break;
-
-   case ir_binop_lshift:
-      inst = emit(SHL(result_dst, op[0], op[1]));
-      break;
-
-   case ir_binop_rshift:
-      if (ir->type->base_type == GLSL_TYPE_INT)
-         inst = emit(ASR(result_dst, op[0], op[1]));
-      else
-         inst = emit(SHR(result_dst, op[0], op[1]));
-      break;
-
-   case ir_binop_bfm:
-      emit(BFI1(result_dst, op[0], op[1]));
-      break;
-
-   case ir_binop_ubo_load: {
-      ir_constant *const_uniform_block = ir->operands[0]->as_constant();
-      ir_constant *const_offset_ir = ir->operands[1]->as_constant();
-      unsigned const_offset = const_offset_ir ? const_offset_ir->value.u[0] : 0;
-      src_reg offset;
-
-      /* Now, load the vector from that offset. */
-      assert(ir->type->is_vector() || ir->type->is_scalar());
-
-      src_reg packed_consts = src_reg(this, glsl_type::vec4_type);
-      packed_consts.type = result.type;
-      src_reg surf_index;
-
-      if (const_uniform_block) {
-         /* The block index is a constant, so just emit the binding table entry
-          * as an immediate.
-          */
-         surf_index = src_reg(prog_data->base.binding_table.ubo_start +
-                              const_uniform_block->value.u[0]);
-      } else {
-         /* The block index is not a constant. Evaluate the index expression
-          * per-channel and add the base UBO index; we have to select a value
-          * from any live channel.
-          */
-         surf_index = src_reg(this, glsl_type::uint_type);
-         emit(ADD(dst_reg(surf_index), op[0],
-                  src_reg(prog_data->base.binding_table.ubo_start)));
-         surf_index = emit_uniformize(surf_index);
-
-         /* Assume this may touch any UBO. It would be nice to provide
-          * a tighter bound, but the array information is already lowered away.
-          */
-         brw_mark_surface_used(&prog_data->base,
-                               prog_data->base.binding_table.ubo_start +
-                               shader_prog->NumBufferInterfaceBlocks - 1);
-      }
-
-      if (const_offset_ir) {
-         if (devinfo->gen >= 8) {
-            /* Store the offset in a GRF so we can send-from-GRF. */
-            offset = src_reg(this, glsl_type::int_type);
-            emit(MOV(dst_reg(offset), src_reg(const_offset / 16)));
-         } else {
-            /* Immediates are fine on older generations since they'll be moved
-             * to a (potentially fake) MRF at the generator level.
-             */
-            offset = src_reg(const_offset / 16);
-         }
-      } else {
-         offset = src_reg(this, glsl_type::uint_type);
-         emit(SHR(dst_reg(offset), op[1], src_reg(4u)));
-      }
-
-      emit_pull_constant_load_reg(dst_reg(packed_consts),
-                                  surf_index,
-                                  offset,
-                                  NULL, NULL /* before_block/inst */);
-
-      packed_consts.swizzle = brw_swizzle_for_size(ir->type->vector_elements);
-      packed_consts.swizzle += BRW_SWIZZLE4(const_offset % 16 / 4,
-                                            const_offset % 16 / 4,
-                                            const_offset % 16 / 4,
-                                            const_offset % 16 / 4);
-
-      /* UBO bools are any nonzero int.  We need to convert them to 0/~0. */
-      if (ir->type->base_type == GLSL_TYPE_BOOL) {
-         emit(CMP(result_dst, packed_consts, src_reg(0u),
-                  BRW_CONDITIONAL_NZ));
-      } else {
-         emit(MOV(result_dst, packed_consts));
-      }
-      break;
-   }
-
-   case ir_binop_vector_extract:
-      unreachable("should have been lowered by vec_index_to_cond_assign");
-
-   case ir_triop_fma:
-      op[0] = fix_3src_operand(op[0]);
-      op[1] = fix_3src_operand(op[1]);
-      op[2] = fix_3src_operand(op[2]);
-      /* Note that the instruction's argument order is reversed from GLSL
-       * and the IR.
-       */
-      emit(MAD(result_dst, op[2], op[1], op[0]));
-      break;
-
-   case ir_triop_lrp:
-      emit_lrp(result_dst, op[0], op[1], op[2]);
-      break;
-
-   case ir_triop_csel:
-      unreachable("already handled above");
-      break;
-
-   case ir_triop_bfi:
-      op[0] = fix_3src_operand(op[0]);
-      op[1] = fix_3src_operand(op[1]);
-      op[2] = fix_3src_operand(op[2]);
-      emit(BFI2(result_dst, op[0], op[1], op[2]));
-      break;
-
-   case ir_triop_bitfield_extract:
-      op[0] = fix_3src_operand(op[0]);
-      op[1] = fix_3src_operand(op[1]);
-      op[2] = fix_3src_operand(op[2]);
-      /* Note that the instruction's argument order is reversed from GLSL
-       * and the IR.
-       */
-      emit(BFE(result_dst, op[2], op[1], op[0]));
-      break;
-
-   case ir_triop_vector_insert:
-      unreachable("should have been lowered by lower_vector_insert");
-
-   case ir_quadop_bitfield_insert:
-      unreachable("not reached: should be handled by "
-              "bitfield_insert_to_bfm_bfi\n");
-
-   case ir_quadop_vector:
-      unreachable("not reached: should be handled by lower_quadop_vector");
-
-   case ir_unop_pack_half_2x16:
-      emit_pack_half_2x16(result_dst, op[0]);
-      break;
-   case ir_unop_unpack_half_2x16:
-      emit_unpack_half_2x16(result_dst, op[0]);
-      break;
-   case ir_unop_unpack_unorm_4x8:
-      emit_unpack_unorm_4x8(result_dst, op[0]);
-      break;
-   case ir_unop_unpack_snorm_4x8:
-      emit_unpack_snorm_4x8(result_dst, op[0]);
-      break;
-   case ir_unop_pack_unorm_4x8:
-      emit_pack_unorm_4x8(result_dst, op[0]);
-      break;
-   case ir_unop_pack_snorm_4x8:
-      emit_pack_snorm_4x8(result_dst, op[0]);
-      break;
-   case ir_unop_pack_snorm_2x16:
-   case ir_unop_pack_unorm_2x16:
-   case ir_unop_unpack_snorm_2x16:
-   case ir_unop_unpack_unorm_2x16:
-      unreachable("not reached: should be handled by lower_packing_builtins");
-   case ir_unop_unpack_half_2x16_split_x:
-   case ir_unop_unpack_half_2x16_split_y:
-   case ir_binop_pack_half_2x16_split:
-   case ir_unop_interpolate_at_centroid:
-   case ir_binop_interpolate_at_sample:
-   case ir_binop_interpolate_at_offset:
-      unreachable("not reached: should not occur in vertex shader");
-   case ir_binop_ldexp:
-      unreachable("not reached: should be handled by ldexp_to_arith()");
-   case ir_unop_d2f:
-   case ir_unop_f2d:
-   case ir_unop_d2i:
-   case ir_unop_i2d:
-   case ir_unop_d2u:
-   case ir_unop_u2d:
-   case ir_unop_d2b:
-   case ir_unop_pack_double_2x32:
-   case ir_unop_unpack_double_2x32:
-   case ir_unop_frexp_sig:
-   case ir_unop_frexp_exp:
-      unreachable("fp64 todo");
-   }
-}
-
-
-void
-vec4_visitor::visit(ir_swizzle *ir)
-{
-   /* Note that this is only swizzles in expressions, not those on the left
-    * hand side of an assignment, which do write masking.  See ir_assignment
-    * for that.
-    */
-   const unsigned swz = brw_compose_swizzle(
-      brw_swizzle_for_size(ir->type->vector_elements),
-      BRW_SWIZZLE4(ir->mask.x, ir->mask.y, ir->mask.z, ir->mask.w));
-
-   ir->val->accept(this);
-   this->result = swizzle(this->result, swz);
-}
-
-void
-vec4_visitor::visit(ir_dereference_variable *ir)
-{
-   const struct glsl_type *type = ir->type;
-   dst_reg *reg = variable_storage(ir->var);
-
-   if (!reg) {
-      fail("Failed to find variable storage for %s\n", ir->var->name);
-      this->result = src_reg(brw_null_reg());
-      return;
-   }
-
-   this->result = src_reg(*reg);
-
-   /* System values get their swizzle from the dst_reg writemask */
-   if (ir->var->data.mode == ir_var_system_value)
-      return;
-
-   if (type->is_scalar() || type->is_vector() || type->is_matrix())
-      this->result.swizzle = brw_swizzle_for_size(type->vector_elements);
-}
-
-
-int
-vec4_visitor::compute_array_stride(ir_dereference_array *ir)
-{
-   /* Under normal circumstances array elements are stored consecutively, so
-    * the stride is equal to the size of the array element.
-    */
-   return type_size_vec4(ir->type);
-}
-
-
-void
-vec4_visitor::visit(ir_dereference_array *ir)
-{
-   ir_constant *constant_index;
-   src_reg src;
-   int array_stride = compute_array_stride(ir);
-
-   constant_index = ir->array_index->constant_expression_value();
-
-   ir->array->accept(this);
-   src = this->result;
-
-   if (constant_index) {
-      src.reg_offset += constant_index->value.i[0] * array_stride;
-   } else {
-      /* Variable index array dereference.  It eats the "vec4" of the
-       * base of the array and an index that offsets the Mesa register
-       * index.
-       */
-      ir->array_index->accept(this);
-
-      src_reg index_reg;
-
-      if (array_stride == 1) {
-        index_reg = this->result;
-      } else {
-        index_reg = src_reg(this, glsl_type::int_type);
-
-        emit(MUL(dst_reg(index_reg), this->result, src_reg(array_stride)));
-      }
-
-      if (src.reladdr) {
-        src_reg temp = src_reg(this, glsl_type::int_type);
-
-        emit(ADD(dst_reg(temp), *src.reladdr, index_reg));
-
-        index_reg = temp;
-      }
-
-      src.reladdr = ralloc(mem_ctx, src_reg);
-      memcpy(src.reladdr, &index_reg, sizeof(index_reg));
-   }
-
-   /* If the type is smaller than a vec4, replicate the last channel out. */
-   if (ir->type->is_scalar() || ir->type->is_vector() || ir->type->is_matrix())
-      src.swizzle = brw_swizzle_for_size(ir->type->vector_elements);
-   else
-      src.swizzle = BRW_SWIZZLE_NOOP;
-   src.type = brw_type_for_base_type(ir->type);
-
-   this->result = src;
-}
-
-void
-vec4_visitor::visit(ir_dereference_record *ir)
-{
-   unsigned int i;
-   const glsl_type *struct_type = ir->record->type;
-   int offset = 0;
-
-   ir->record->accept(this);
-
-   for (i = 0; i < struct_type->length; i++) {
-      if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
-        break;
-      offset += type_size_vec4(struct_type->fields.structure[i].type);
-   }
-
-   /* If the type is smaller than a vec4, replicate the last channel out. */
-   if (ir->type->is_scalar() || ir->type->is_vector() || ir->type->is_matrix())
-      this->result.swizzle = brw_swizzle_for_size(ir->type->vector_elements);
-   else
-      this->result.swizzle = BRW_SWIZZLE_NOOP;
-   this->result.type = brw_type_for_base_type(ir->type);
-
-   this->result.reg_offset += offset;
-}
-
-/**
- * We want to be careful in assignment setup to hit the actual storage
- * instead of potentially using a temporary like we might with the
- * ir_dereference handler.
- */
-static dst_reg
-get_assignment_lhs(ir_dereference *ir, vec4_visitor *v)
-{
-   /* The LHS must be a dereference.  If the LHS is a variable indexed array
-    * access of a vector, it must be separated into a series conditional moves
-    * before reaching this point (see ir_vec_index_to_cond_assign).
-    */
-   assert(ir->as_dereference());
-   ir_dereference_array *deref_array = ir->as_dereference_array();
-   if (deref_array) {
-      assert(!deref_array->array->type->is_vector());
-   }
-
-   /* Use the rvalue deref handler for the most part.  We'll ignore
-    * swizzles in it and write swizzles using writemask, though.
-    */
-   ir->accept(v);
-   return dst_reg(v->result);
-}
-
-void
-vec4_visitor::emit_block_move(dst_reg *dst, src_reg *src,
-                              const struct glsl_type *type,
-                              enum brw_predicate predicate)
-{
-   if (type->base_type == GLSL_TYPE_STRUCT) {
-      for (unsigned int i = 0; i < type->length; i++) {
-        emit_block_move(dst, src, type->fields.structure[i].type, predicate);
-      }
-      return;
-   }
-
-   if (type->is_array()) {
-      for (unsigned int i = 0; i < type->length; i++) {
-        emit_block_move(dst, src, type->fields.array, predicate);
-      }
-      return;
-   }
-
-   if (type->is_matrix()) {
-      const struct glsl_type *vec_type;
-
-      vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT,
-                                        type->vector_elements, 1);
-
-      for (int i = 0; i < type->matrix_columns; i++) {
-        emit_block_move(dst, src, vec_type, predicate);
-      }
-      return;
-   }
-
-   assert(type->is_scalar() || type->is_vector());
-
-   dst->type = brw_type_for_base_type(type);
-   src->type = dst->type;
-
-   dst->writemask = (1 << type->vector_elements) - 1;
-
-   src->swizzle = brw_swizzle_for_size(type->vector_elements);
-
-   vec4_instruction *inst = emit(MOV(*dst, *src));
-   inst->predicate = predicate;
-
-   dst->reg_offset++;
-   src->reg_offset++;
-}
-
-
-/* If the RHS processing resulted in an instruction generating a
- * temporary value, and it would be easy to rewrite the instruction to
- * generate its result right into the LHS instead, do so.  This ends
- * up reliably removing instructions where it can be tricky to do so
- * later without real UD chain information.
- */
-bool
-vec4_visitor::try_rewrite_rhs_to_dst(ir_assignment *ir,
-                                    dst_reg dst,
-                                    src_reg src,
-                                    vec4_instruction *pre_rhs_inst,
-                                    vec4_instruction *last_rhs_inst)
-{
-   /* This could be supported, but it would take more smarts. */
-   if (ir->condition)
-      return false;
-
-   if (pre_rhs_inst == last_rhs_inst)
-      return false; /* No instructions generated to work with. */
-
-   /* Make sure the last instruction generated our source reg. */
-   if (src.file != GRF ||
-       src.file != last_rhs_inst->dst.file ||
-       src.reg != last_rhs_inst->dst.reg ||
-       src.reg_offset != last_rhs_inst->dst.reg_offset ||
-       src.reladdr ||
-       src.abs ||
-       src.negate ||
-       last_rhs_inst->predicate != BRW_PREDICATE_NONE)
-      return false;
-
-   /* Check that that last instruction fully initialized the channels
-    * we want to use, in the order we want to use them.  We could
-    * potentially reswizzle the operands of many instructions so that
-    * we could handle out of order channels, but don't yet.
-    */
-
-   for (unsigned i = 0; i < 4; i++) {
-      if (dst.writemask & (1 << i)) {
-        if (!(last_rhs_inst->dst.writemask & (1 << i)))
-           return false;
-
-        if (BRW_GET_SWZ(src.swizzle, i) != i)
-           return false;
-      }
-   }
-
-   /* Success!  Rewrite the instruction. */
-   last_rhs_inst->dst.file = dst.file;
-   last_rhs_inst->dst.reg = dst.reg;
-   last_rhs_inst->dst.reg_offset = dst.reg_offset;
-   last_rhs_inst->dst.reladdr = dst.reladdr;
-   last_rhs_inst->dst.writemask &= dst.writemask;
-
-   return true;
-}
-
-void
-vec4_visitor::visit(ir_assignment *ir)
-{
-   dst_reg dst = get_assignment_lhs(ir->lhs, this);
-   enum brw_predicate predicate = BRW_PREDICATE_NONE;
-
-   if (!ir->lhs->type->is_scalar() &&
-       !ir->lhs->type->is_vector()) {
-      ir->rhs->accept(this);
-      src_reg src = this->result;
-
-      if (ir->condition) {
-        emit_bool_to_cond_code(ir->condition, &predicate);
-      }
-
-      /* emit_block_move doesn't account for swizzles in the source register.
-       * This should be ok, since the source register is a structure or an
-       * array, and those can't be swizzled.  But double-check to be sure.
-       */
-      assert(src.swizzle ==
-             (ir->rhs->type->is_matrix()
-              ? brw_swizzle_for_size(ir->rhs->type->vector_elements)
-              : BRW_SWIZZLE_NOOP));
-
-      emit_block_move(&dst, &src, ir->rhs->type, predicate);
-      return;
-   }
-
-   /* Now we're down to just a scalar/vector with writemasks. */
-   int i;
-
-   vec4_instruction *pre_rhs_inst, *last_rhs_inst;
-   pre_rhs_inst = (vec4_instruction *)this->instructions.get_tail();
+}
  
-   ir->rhs->accept(this);
+dst_reg::dst_reg(class vec4_visitor *v, const struct glsl_type *type)
+{
+   init();
  
-   last_rhs_inst = (vec4_instruction *)this->instructions.get_tail();
+   this->file = GRF;
+   this->reg = v->alloc.allocate(type_size_vec4(type));
  
-   int swizzles[4];
-   int src_chan = 0;
+   if (type->is_array() || type->is_record()) {
+      this->writemask = WRITEMASK_XYZW;
+   } else {
+      this->writemask = (1 << type->vector_elements) - 1;
+   }
  
-   assert(ir->lhs->type->is_vector() ||
-         ir->lhs->type->is_scalar());
-   dst.writemask = ir->write_mask;
+   this->type = brw_type_for_base_type(type);
+}
  
-   /* Swizzle a small RHS vector into the channels being written.
-    *
-    * glsl ir treats write_mask as dictating how many channels are
-    * present on the RHS while in our instructions we need to make
-    * those channels appear in the slots of the vec4 they're written to.
-    */
-   for (int i = 0; i < 4; i++)
-      swizzles[i] = (ir->write_mask & (1 << i) ? src_chan++ : 0);
+vec4_instruction *
+vec4_visitor::emit_minmax(enum brw_conditional_mod conditionalmod, dst_reg dst,
+                          src_reg src0, src_reg src1)
+{
+   vec4_instruction *inst;
  
-   src_reg src = swizzle(this->result,
-                         BRW_SWIZZLE4(swizzles[0], swizzles[1],
-                                      swizzles[2], swizzles[3]));
+   if (devinfo->gen >= 6) {
+      inst = emit(BRW_OPCODE_SEL, dst, src0, src1);
+      inst->conditional_mod = conditionalmod;
+   } else {
+      emit(CMP(dst, src0, src1, conditionalmod));
  
-   if (try_rewrite_rhs_to_dst(ir, dst, src, pre_rhs_inst, last_rhs_inst)) {
-      return;
+      inst = emit(BRW_OPCODE_SEL, dst, src0, src1);
+      inst->predicate = BRW_PREDICATE_NORMAL;
     }
  
-   if (ir->condition) {
-      emit_bool_to_cond_code(ir->condition, &predicate);
-   }
+   return inst;
+}
  
-   for (i = 0; i < type_size_vec4(ir->lhs->type); i++) {
-      vec4_instruction *inst = emit(MOV(dst, src));
-      inst->predicate = predicate;
+vec4_instruction *
+vec4_visitor::emit_lrp(const dst_reg &dst,
+                       const src_reg &x, const src_reg &y, const src_reg &a)
+{
+   if (devinfo->gen >= 6) {
+      /* Note that the instruction's argument order is reversed from GLSL
+       * and the IR.
+       */
+     return emit(LRP(dst, fix_3src_operand(a), fix_3src_operand(y),
+                     fix_3src_operand(x)));
+   } else {
+      /* Earlier generations don't support three source operations, so we
+       * need to emit x*(1-a) + y*a.
+       */
+      dst_reg y_times_a           = dst_reg(this, glsl_type::vec4_type);
+      dst_reg one_minus_a         = dst_reg(this, glsl_type::vec4_type);
+      dst_reg x_times_one_minus_a = dst_reg(this, glsl_type::vec4_type);
+      y_times_a.writemask           = dst.writemask;
+      one_minus_a.writemask         = dst.writemask;
+      x_times_one_minus_a.writemask = dst.writemask;
  
-      dst.reg_offset++;
-      src.reg_offset++;
+      emit(MUL(y_times_a, y, a));
+      emit(ADD(one_minus_a, negate(a), src_reg(1.0f)));
+      emit(MUL(x_times_one_minus_a, x, src_reg(one_minus_a)));
+      return emit(ADD(dst, src_reg(x_times_one_minus_a), src_reg(y_times_a)));
     }
  }
  
+/**
+ * Emits the instructions needed to perform a pull constant load. before_block
+ * and before_inst can be NULL in which case the instruction will be appended
+ * to the end of the instruction list.
+ */
  void
-vec4_visitor::emit_constant_values(dst_reg *dst, ir_constant *ir)
+vec4_visitor::emit_pull_constant_load_reg(dst_reg dst,
+                                          src_reg surf_index,
+                                          src_reg offset_reg,
+                                          bblock_t *before_block,
+                                          vec4_instruction *before_inst)
  {
-   if (ir->type->base_type == GLSL_TYPE_STRUCT) {
-      foreach_in_list(ir_constant, field_value, &ir->components) {
-        emit_constant_values(dst, field_value);
-      }
-      return;
-   }
-
-   if (ir->type->is_array()) {
-      for (unsigned int i = 0; i < ir->type->length; i++) {
-        emit_constant_values(dst, ir->array_elements[i]);
-      }
-      return;
-   }
-
-   if (ir->type->is_matrix()) {
-      for (int i = 0; i < ir->type->matrix_columns; i++) {
-        float *vec = &ir->value.f[i * ir->type->vector_elements];
+   assert((before_inst == NULL && before_block == NULL) ||
+          (before_inst && before_block));
  
-        for (int j = 0; j < ir->type->vector_elements; j++) {
-           dst->writemask = 1 << j;
-           dst->type = BRW_REGISTER_TYPE_F;
+   vec4_instruction *pull;
  
-           emit(MOV(*dst, src_reg(vec[j])));
-        }
-        dst->reg_offset++;
-      }
-      return;
-   }
+   if (devinfo->gen >= 9) {
+      /* Gen9+ needs a message header in order to use SIMD4x2 mode */
+      src_reg header(this, glsl_type::uvec4_type, 2);
  
-   int remaining_writemask = (1 << ir->type->vector_elements) - 1;
+      pull = new(mem_ctx)
+         vec4_instruction(VS_OPCODE_SET_SIMD4X2_HEADER_GEN9,
+                          dst_reg(header));
  
-   for (int i = 0; i < ir->type->vector_elements; i++) {
-      if (!(remaining_writemask & (1 << i)))
-        continue;
+      if (before_inst)
+         emit_before(before_block, before_inst, pull);
+      else
+         emit(pull);
  
-      dst->writemask = 1 << i;
-      dst->type = brw_type_for_base_type(ir->type);
+      dst_reg index_reg = retype(offset(dst_reg(header), 1),
+                                 offset_reg.type);
+      pull = MOV(writemask(index_reg, WRITEMASK_X), offset_reg);
  
-      /* Find other components that match the one we're about to
-       * write.  Emits fewer instructions for things like vec4(0.5,
-       * 1.5, 1.5, 1.5).
-       */
-      for (int j = i + 1; j < ir->type->vector_elements; j++) {
-        if (ir->type->base_type == GLSL_TYPE_BOOL) {
-           if (ir->value.b[i] == ir->value.b[j])
-              dst->writemask |= (1 << j);
-        } else {
-           /* u, i, and f storage all line up, so no need for a
-            * switch case for comparing each type.
-            */
-           if (ir->value.u[i] == ir->value.u[j])
-              dst->writemask |= (1 << j);
-        }
-      }
+      if (before_inst)
+         emit_before(before_block, before_inst, pull);
+      else
+         emit(pull);
  
-      switch (ir->type->base_type) {
-      case GLSL_TYPE_FLOAT:
-        emit(MOV(*dst, src_reg(ir->value.f[i])));
-        break;
-      case GLSL_TYPE_INT:
-        emit(MOV(*dst, src_reg(ir->value.i[i])));
-        break;
-      case GLSL_TYPE_UINT:
-        emit(MOV(*dst, src_reg(ir->value.u[i])));
-        break;
-      case GLSL_TYPE_BOOL:
-         emit(MOV(*dst, src_reg(ir->value.b[i] != 0 ? ~0 : 0)));
-        break;
-      default:
-        unreachable("Non-float/uint/int/bool constant");
-      }
+      pull = new(mem_ctx) vec4_instruction(VS_OPCODE_PULL_CONSTANT_LOAD_GEN7,
+                                           dst,
+                                           surf_index,
+                                           header);
+      pull->mlen = 2;
+      pull->header_size = 1;
+   } else if (devinfo->gen >= 7) {
+      dst_reg grf_offset = dst_reg(this, glsl_type::int_type);
  
-      remaining_writemask &= ~dst->writemask;
-   }
-   dst->reg_offset++;
-}
+      grf_offset.type = offset_reg.type;
  
-void
-vec4_visitor::visit(ir_constant *ir)
-{
-   dst_reg dst = dst_reg(this, ir->type);
-   this->result = src_reg(dst);
+      pull = MOV(grf_offset, offset_reg);
  
-   emit_constant_values(&dst, ir);
-}
+      if (before_inst)
+         emit_before(before_block, before_inst, pull);
+      else
+         emit(pull);
  
-void
-vec4_visitor::visit_atomic_counter_intrinsic(ir_call *ir)
-{
-   ir_dereference *deref = static_cast<ir_dereference *>(
-      ir->actual_parameters.get_head());
-   ir_variable *location = deref->variable_referenced();
-   unsigned surf_index = (prog_data->base.binding_table.abo_start +
-                          location->data.binding);
-
-   /* Calculate the surface offset */
-   src_reg offset(this, glsl_type::uint_type);
-   ir_dereference_array *deref_array = deref->as_dereference_array();
-   if (deref_array) {
-      deref_array->array_index->accept(this);
-
-      src_reg tmp(this, glsl_type::uint_type);
-      emit(MUL(dst_reg(tmp), this->result, ATOMIC_COUNTER_SIZE));
-      emit(ADD(dst_reg(offset), tmp, location->data.atomic.offset));
+      pull = new(mem_ctx) vec4_instruction(VS_OPCODE_PULL_CONSTANT_LOAD_GEN7,
+                                           dst,
+                                           surf_index,
+                                           src_reg(grf_offset));
+      pull->mlen = 1;
     } else {
-      offset = location->data.atomic.offset;
-   }
-
-   /* Emit the appropriate machine instruction */
-   const char *callee = ir->callee->function_name();
-   dst_reg dst = get_assignment_lhs(ir->return_deref, this);
-
-   if (!strcmp("__intrinsic_atomic_read", callee)) {
-      emit_untyped_surface_read(surf_index, dst, offset);
-
-   } else if (!strcmp("__intrinsic_atomic_increment", callee)) {
-      emit_untyped_atomic(BRW_AOP_INC, surf_index, dst, offset,
-                          src_reg(), src_reg());
-
-   } else if (!strcmp("__intrinsic_atomic_predecrement", callee)) {
-      emit_untyped_atomic(BRW_AOP_PREDEC, surf_index, dst, offset,
-                          src_reg(), src_reg());
+      pull = new(mem_ctx) vec4_instruction(VS_OPCODE_PULL_CONSTANT_LOAD,
+                                           dst,
+                                           surf_index,
+                                           offset_reg);
+      pull->base_mrf = FIRST_SPILL_MRF(devinfo->gen) + 1;
+      pull->mlen = 1;
     }
  
-   brw_mark_surface_used(stage_prog_data, surf_index);
+   if (before_inst)
+      emit_before(before_block, before_inst, pull);
+   else
+      emit(pull);
  }
  
-void
-vec4_visitor::visit(ir_call *ir)
+src_reg
+vec4_visitor::emit_uniformize(const src_reg &src)
  {
-   const char *callee = ir->callee->function_name();
+   const src_reg chan_index(this, glsl_type::uint_type);
+   const dst_reg dst = retype(dst_reg(this, glsl_type::uint_type),
+                              src.type);
  
-   if (!strcmp("__intrinsic_atomic_read", callee) ||
-       !strcmp("__intrinsic_atomic_increment", callee) ||
-       !strcmp("__intrinsic_atomic_predecrement", callee)) {
-      visit_atomic_counter_intrinsic(ir);
-   } else {
-      unreachable("Unsupported intrinsic.");
-   }
+   emit(SHADER_OPCODE_FIND_LIVE_CHANNEL, dst_reg(chan_index))
+      ->force_writemask_all = true;
+   emit(SHADER_OPCODE_BROADCAST, dst, src, chan_index)
+      ->force_writemask_all = true;
+
+   return src_reg(dst);
  }
  
  src_reg
@@ -2718,155 +1062,6 @@ vec4_visitor::emit_texture(ir_texture_opcode op,
                    src_reg(inst->dst), sampler, dest_type);
  }
  
-void
-vec4_visitor::visit(ir_texture *ir)
-{
-   uint32_t sampler =
-      _mesa_get_sampler_uniform_value(ir->sampler, shader_prog, prog);
-
-   ir_rvalue *nonconst_sampler_index =
-      _mesa_get_sampler_array_nonconst_index(ir->sampler);
-
-   /* Handle non-constant sampler array indexing */
-   src_reg sampler_reg;
-   if (nonconst_sampler_index) {
-      /* The highest sampler which may be used by this operation is
-       * the last element of the array. Mark it here, because the generator
-       * doesn't have enough information to determine the bound.
-       */
-      uint32_t array_size = ir->sampler->as_dereference_array()
-         ->array->type->array_size();
-
-      uint32_t max_used = sampler + array_size - 1;
-      if (ir->op == ir_tg4 && devinfo->gen < 8) {
-         max_used += prog_data->base.binding_table.gather_texture_start;
-      } else {
-         max_used += prog_data->base.binding_table.texture_start;
-      }
-
-      brw_mark_surface_used(&prog_data->base, max_used);
-
-      /* Emit code to evaluate the actual indexing expression */
-      nonconst_sampler_index->accept(this);
-      src_reg temp(this, glsl_type::uint_type);
-      emit(ADD(dst_reg(temp), this->result, src_reg(sampler)));
-      sampler_reg = emit_uniformize(temp);
-   } else {
-      /* Single sampler, or constant array index; the indexing expression
-       * is just an immediate.
-       */
-      sampler_reg = src_reg(sampler);
-   }
-
-   /* When tg4 is used with the degenerate ZERO/ONE swizzles, don't bother
-    * emitting anything other than setting up the constant result.
-    */
-   if (ir->op == ir_tg4) {
-      ir_constant *chan = ir->lod_info.component->as_constant();
-      int swiz = GET_SWZ(key_tex->swizzles[sampler], chan->value.i[0]);
-      if (swiz == SWIZZLE_ZERO || swiz == SWIZZLE_ONE) {
-         dst_reg result(this, ir->type);
-         this->result = src_reg(result);
-         emit(MOV(result, src_reg(swiz == SWIZZLE_ONE ? 1.0f : 0.0f)));
-         return;
-      }
-   }
-
-   /* Should be lowered by do_lower_texture_projection */
-   assert(!ir->projector);
-
-   /* Should be lowered */
-   assert(!ir->offset || !ir->offset->type->is_array());
-
-   /* Generate code to compute all the subexpression trees.  This has to be
-    * done before loading any values into MRFs for the sampler message since
-    * generating these values may involve SEND messages that need the MRFs.
-    */
-   src_reg coordinate;
-   int coord_components = 0;
-   if (ir->coordinate) {
-      coord_components = ir->coordinate->type->vector_elements;
-      ir->coordinate->accept(this);
-      coordinate = this->result;
-   }
-
-   src_reg shadow_comparitor;
-   if (ir->shadow_comparitor) {
-      ir->shadow_comparitor->accept(this);
-      shadow_comparitor = this->result;
-   }
-
-   bool has_nonconstant_offset = ir->offset && !ir->offset->as_constant();
-   src_reg offset_value;
-   if (has_nonconstant_offset) {
-      ir->offset->accept(this);
-      offset_value = src_reg(this->result);
-   }
-
-   src_reg lod, lod2, sample_index, mcs;
-   switch (ir->op) {
-   case ir_tex:
-      lod = src_reg(0.0f);
-      break;
-   case ir_txf:
-   case ir_txl:
-   case ir_txs:
-      ir->lod_info.lod->accept(this);
-      lod = this->result;
-      break;
-   case ir_query_levels:
-      lod = src_reg(0);
-      break;
-   case ir_txf_ms:
-      ir->lod_info.sample_index->accept(this);
-      sample_index = this->result;
-
-      if (devinfo->gen >= 7 && key_tex->compressed_multisample_layout_mask & (1 << sampler))
-         mcs = emit_mcs_fetch(ir->coordinate->type, coordinate, sampler_reg);
-      else
-         mcs = src_reg(0u);
-      break;
-   case ir_txd:
-      ir->lod_info.grad.dPdx->accept(this);
-      lod = this->result;
-
-      ir->lod_info.grad.dPdy->accept(this);
-      lod2 = this->result;
-      break;
-   case ir_txb:
-   case ir_lod:
-   case ir_tg4:
-   case ir_texture_samples:
-      break;
-   }
-
-   uint32_t constant_offset = 0;
-   if (ir->offset != NULL && !has_nonconstant_offset) {
-      constant_offset  =
-         brw_texture_offset(ir->offset->as_constant()->value.i,
-                            ir->offset->type->vector_elements);
-   }
-
-   /* Stuff the channel select bits in the top of the texture offset */
-   if (ir->op == ir_tg4)
-      constant_offset |=
-         gather_channel( ir->lod_info.component->as_constant()->value.i[0],
-                         sampler) << 16;
-
-   glsl_type const *type = ir->sampler->type;
-   bool is_cube_array = type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE &&
-      type->sampler_array;
-
-   this->result = src_reg(this, ir->type);
-   dst_reg dest = dst_reg(this->result);
-
-   emit_texture(ir->op, dest, ir->type, coordinate, coord_components,
-                shadow_comparitor,
-                lod, lod2, sample_index,
-                constant_offset, offset_value,
-                mcs, is_cube_array, sampler, sampler_reg);
-}
-
  /**
   * Apply workarounds for Gen6 gather with UINT/SINT
   */
@@ -2975,78 +1170,18 @@ vec4_visitor::swizzle_result(ir_texture_opcode op, dst_reg dest,
     }
  }
  
-void
-vec4_visitor::visit(ir_return *)
-{
-   unreachable("not reached");
-}
-
-void
-vec4_visitor::visit(ir_discard *)
-{
-   unreachable("not reached");
-}
-
-void
-vec4_visitor::visit(ir_if *ir)
-{
-   /* Don't point the annotation at the if statement, because then it plus
-    * the then and else blocks get printed.
-    */
-   this->base_ir = ir->condition;
-
-   if (devinfo->gen == 6) {
-      emit_if_gen6(ir);
-   } else {
-      enum brw_predicate predicate;
-      emit_bool_to_cond_code(ir->condition, &predicate);
-      emit(IF(predicate));
-   }
-
-   visit_instructions(&ir->then_instructions);
-
-   if (!ir->else_instructions.is_empty()) {
-      this->base_ir = ir->condition;
-      emit(BRW_OPCODE_ELSE);
-
-      visit_instructions(&ir->else_instructions);
-   }
-
-   this->base_ir = ir->condition;
-   emit(BRW_OPCODE_ENDIF);
-}
-
  void
  vec4_visitor::gs_emit_vertex(int stream_id)
  {
     unreachable("not reached");
  }
  
-void
-vec4_visitor::visit(ir_emit_vertex *)
-{
-   unreachable("not reached");
-}
-
  void
  vec4_visitor::gs_end_primitive()
  {
     unreachable("not reached");
  }
  
-
-void
-vec4_visitor::visit(ir_end_primitive *)
-{
-   unreachable("not reached");
-}
-
-void
-vec4_visitor::visit(ir_barrier *)
-{
-   unreachable("not reached");
-}
-
  void
  vec4_visitor::emit_untyped_atomic(unsigned atomic_op, unsigned surf_index,
                                    dst_reg dst, src_reg offset,
@@ -3688,42 +1823,17 @@ vec4_visitor::resolve_ud_negate(src_reg *reg)
     *reg = temp;
  }
  
-/**
- * Resolve the result of a Gen4-5 CMP instruction to a proper boolean.
- *
- * CMP on Gen4-5 only sets the LSB of the result; the rest are undefined.
- * If we need a proper boolean value, we have to fix it up to be 0 or ~0.
- */
-void
-vec4_visitor::resolve_bool_comparison(ir_rvalue *rvalue, src_reg *reg)
-{
-   assert(devinfo->gen <= 5);
-
-   if (!rvalue->type->is_boolean())
-      return;
-
-   src_reg and_result = src_reg(this, rvalue->type);
-   src_reg neg_result = src_reg(this, rvalue->type);
-   emit(AND(dst_reg(and_result), *reg, src_reg(1)));
-   emit(MOV(dst_reg(neg_result), negate(and_result)));
-   *reg = neg_result;
-}
-
  vec4_visitor::vec4_visitor(const struct brw_compiler *compiler,
                             void *log_data,
-                           struct gl_program *prog,
                             const struct brw_sampler_prog_key_data *key_tex,
                             struct brw_vue_prog_data *prog_data,
-                          struct gl_shader_program *shader_prog,
-                           gl_shader_stage stage,
+                           nir_shader *shader,
                            void *mem_ctx,
                             bool no_spills,
                             int shader_time_index)
-   : backend_shader(compiler, log_data, mem_ctx,
-                    shader_prog, prog, &prog_data->base, stage),
+   : backend_shader(compiler, log_data, mem_ctx, shader, &prog_data->base),
       key_tex(key_tex),
       prog_data(prog_data),
-     sanity_param_count(0),
       fail_msg(NULL),
       first_non_payload_grf(0),
       need_all_constants_in_pull_buffer(false),
@@ -3737,10 +1847,6 @@ vec4_visitor::vec4_visitor(const struct brw_compiler *compiler,
     this->current_annotation = NULL;
     memset(this->output_reg_annotation, 0, sizeof(this->output_reg_annotation));
  
-   this->variable_ht = hash_table_ctor(0,
-                                      hash_table_pointer_hash,
-                                      hash_table_pointer_compare);
-
     this->virtual_grf_start = NULL;
     this->virtual_grf_end = NULL;
     this->live_intervals = NULL;
@@ -3759,12 +1865,10 @@ vec4_visitor::vec4_visitor(const struct brw_compiler *compiler,
     }
  
     this->uniform_size = rzalloc_array(mem_ctx, int, this->uniform_array_size);
-   this->uniform_vector_size = rzalloc_array(mem_ctx, int, this->uniform_array_size);
  }
  
  vec4_visitor::~vec4_visitor()
  {
-   hash_table_dtor(this->variable_ht);
  }
  
  
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_vp.cpp b/src/mesa/drivers/dri/i965/brw_vec4_vp.cpp

deleted file mode 100644 (file)

index d1a72d7..0000000
--- a/src/mesa/drivers/dri/i965/brw_vec4_vp.cpp
+++ /dev/null
@@ -1,649 +0,0 @@
-/*
- * Copyright © 2012 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-/** @file brw_vec4_vp.cpp
- *
- * A translator from Mesa IR to the i965 driver's Vec4 IR, used to implement
- * ARB_vertex_program and fixed-function vertex processing.
- */
-
-#include "brw_context.h"
-#include "brw_vec4.h"
-#include "brw_vs.h"
-extern "C" {
-#include "program/prog_parameter.h"
-#include "program/prog_print.h"
-}
-using namespace brw;
-
-void
-vec4_visitor::emit_vp_sop(enum brw_conditional_mod conditional_mod,
-                          dst_reg dst, src_reg src0, src_reg src1,
-                          src_reg one)
-{
-   vec4_instruction *inst;
-
-   inst = emit(CMP(dst_null_f(), src0, src1, conditional_mod));
-
-   inst = emit(BRW_OPCODE_SEL, dst, one, src_reg(0.0f));
-   inst->predicate = BRW_PREDICATE_NORMAL;
-}
-
-void
-vec4_vs_visitor::emit_program_code()
-{
-   this->need_all_constants_in_pull_buffer = false;
-
-   setup_vp_regs();
-
-   /* Keep a reg with 1.0 around, for reuse by emit_vs_sop so that it can just
-    * be:
-    *
-    * sel.f0 dst 1.0 0.0
-    *
-    * instead of
-    *
-    * mov    dst 0.0
-    * mov.f0 dst 1.0
-    */
-   src_reg one = src_reg(this, glsl_type::float_type);
-   emit(MOV(dst_reg(one), src_reg(1.0f)));
-
-   for (unsigned int insn = 0; insn < prog->NumInstructions; insn++) {
-      const struct prog_instruction *vpi = &prog->Instructions[insn];
-      base_ir = vpi;
-
-      dst_reg dst;
-      src_reg src[3];
-
-      /* We always emit into a temporary destination register to avoid
-       * aliasing issues.
-       */
-      dst = dst_reg(this, glsl_type::vec4_type);
-
-      for (int i = 0; i < 3; i++)
-         src[i] = get_vp_src_reg(vpi->SrcReg[i]);
-
-      switch (vpi->Opcode) {
-      case OPCODE_ABS:
-         src[0].abs = true;
-         src[0].negate = false;
-         emit(MOV(dst, src[0]));
-         break;
-
-      case OPCODE_ADD:
-         emit(ADD(dst, src[0], src[1]));
-         break;
-
-      case OPCODE_ARL:
-         if (devinfo->gen >= 6) {
-            dst.writemask = WRITEMASK_X;
-            dst_reg dst_f = dst;
-            dst_f.type = BRW_REGISTER_TYPE_F;
-
-            emit(RNDD(dst_f, src[0]));
-            emit(MOV(dst, src_reg(dst_f)));
-         } else {
-            emit(RNDD(dst, src[0]));
-         }
-         break;
-
-      case OPCODE_DP3:
-         emit(DP3(dst, src[0], src[1]));
-         break;
-      case OPCODE_DP4:
-         emit(DP4(dst, src[0], src[1]));
-         break;
-      case OPCODE_DPH:
-         emit(DPH(dst, src[0], src[1]));
-         break;
-
-      case OPCODE_DST: {
-         dst_reg t = dst;
-         if (vpi->DstReg.WriteMask & WRITEMASK_X) {
-            t.writemask = WRITEMASK_X;
-            emit(MOV(t, src_reg(1.0f)));
-         }
-         if (vpi->DstReg.WriteMask & WRITEMASK_Y) {
-            t.writemask = WRITEMASK_Y;
-            emit(MUL(t, src[0], src[1]));
-         }
-         if (vpi->DstReg.WriteMask & WRITEMASK_Z) {
-            t.writemask = WRITEMASK_Z;
-            emit(MOV(t, src[0]));
-         }
-         if (vpi->DstReg.WriteMask & WRITEMASK_W) {
-            t.writemask = WRITEMASK_W;
-            emit(MOV(t, src[1]));
-         }
-         break;
-      }
-
-      case OPCODE_EXP: {
-         dst_reg result = dst;
-         if (vpi->DstReg.WriteMask & WRITEMASK_X) {
-            /* tmp_d = floor(src[0].x) */
-            src_reg tmp_d = src_reg(this, glsl_type::ivec4_type);
-            assert(tmp_d.type == BRW_REGISTER_TYPE_D);
-            emit(RNDD(dst_reg(tmp_d), swizzle(src[0], BRW_SWIZZLE_XXXX)));
-
-            /* result[0] = 2.0 ^ tmp */
-            /* Adjust exponent for floating point: exp += 127 */
-            dst_reg tmp_d_x(GRF, tmp_d.reg, glsl_type::int_type, WRITEMASK_X);
-            emit(ADD(tmp_d_x, tmp_d, src_reg(127)));
-
-            /* Install exponent and sign.  Excess drops off the edge: */
-            dst_reg res_d_x(GRF, result.reg, glsl_type::int_type, WRITEMASK_X);
-            emit(BRW_OPCODE_SHL, res_d_x, tmp_d, src_reg(23));
-         }
-         if (vpi->DstReg.WriteMask & WRITEMASK_Y) {
-            result.writemask = WRITEMASK_Y;
-            emit(FRC(result, src[0]));
-         }
-         if (vpi->DstReg.WriteMask & WRITEMASK_Z) {
-            result.writemask = WRITEMASK_Z;
-            emit_math(SHADER_OPCODE_EXP2, result, src[0]);
-         }
-         if (vpi->DstReg.WriteMask & WRITEMASK_W) {
-            result.writemask = WRITEMASK_W;
-            emit(MOV(result, src_reg(1.0f)));
-         }
-         break;
-      }
-
-      case OPCODE_EX2:
-         emit_math(SHADER_OPCODE_EXP2, dst, src[0]);
-         break;
-
-      case OPCODE_FLR:
-         emit(RNDD(dst, src[0]));
-         break;
-
-      case OPCODE_FRC:
-         emit(FRC(dst, src[0]));
-         break;
-
-      case OPCODE_LG2:
-         emit_math(SHADER_OPCODE_LOG2, dst, src[0]);
-         break;
-
-      case OPCODE_LIT: {
-         dst_reg result = dst;
-         /* From the ARB_vertex_program spec:
-          *
-          *      tmp = VectorLoad(op0);
-          *      if (tmp.x < 0) tmp.x = 0;
-          *      if (tmp.y < 0) tmp.y = 0;
-          *      if (tmp.w < -(128.0-epsilon)) tmp.w = -(128.0-epsilon);
-          *      else if (tmp.w > 128-epsilon) tmp.w = 128-epsilon;
-          *      result.x = 1.0;
-          *      result.y = tmp.x;
-          *      result.z = (tmp.x > 0) ? RoughApproxPower(tmp.y, tmp.w) : 0.0;
-          *      result.w = 1.0;
-          *
-          * Note that we don't do the clamping to +/- 128.  We didn't in
-          * brw_vs_emit.c either.
-          */
-         if (vpi->DstReg.WriteMask & WRITEMASK_XW) {
-            result.writemask = WRITEMASK_XW;
-            emit(MOV(result, src_reg(1.0f)));
-         }
-         if (vpi->DstReg.WriteMask & WRITEMASK_YZ) {
-            result.writemask = WRITEMASK_YZ;
-            emit(MOV(result, src_reg(0.0f)));
-
-            src_reg tmp_x = swizzle(src[0], BRW_SWIZZLE_XXXX);
-
-            emit(CMP(dst_null_d(), tmp_x, src_reg(0.0f), BRW_CONDITIONAL_G));
-            emit(IF(BRW_PREDICATE_NORMAL));
-
-            if (vpi->DstReg.WriteMask & WRITEMASK_Y) {
-               result.writemask = WRITEMASK_Y;
-               emit(MOV(result, tmp_x));
-            }
-
-            if (vpi->DstReg.WriteMask & WRITEMASK_Z) {
-               /* if (tmp.y < 0) tmp.y = 0; */
-               src_reg tmp_y = swizzle(src[0], BRW_SWIZZLE_YYYY);
-               result.writemask = WRITEMASK_Z;
-               emit_minmax(BRW_CONDITIONAL_GE, result, tmp_y, src_reg(0.0f));
-
-               src_reg clamped_y(result);
-               clamped_y.swizzle = BRW_SWIZZLE_ZZZZ;
-
-               src_reg tmp_w = swizzle(src[0], BRW_SWIZZLE_WWWW);
-
-               emit_math(SHADER_OPCODE_POW, result, clamped_y, tmp_w);
-            }
-            emit(BRW_OPCODE_ENDIF);
-         }
-         break;
-      }
-
-      case OPCODE_LOG: {
-         dst_reg result = dst;
-         result.type = BRW_REGISTER_TYPE_UD;
-         src_reg result_src = src_reg(result);
-
-         src_reg arg0_ud = swizzle(src[0], BRW_SWIZZLE_XXXX);
-         arg0_ud.type = BRW_REGISTER_TYPE_UD;
-
-         /* Perform mant = frexpf(fabsf(x), &exp), adjust exp and mnt
-          * according to spec:
-          *
-          * These almost look likey they could be joined up, but not really
-          * practical:
-          *
-          * result[0].f = (x.i & ((1<<31)-1) >> 23) - 127
-          * result[1].i = (x.i & ((1<<23)-1)        + (127<<23)
-          */
-         if (vpi->DstReg.WriteMask & WRITEMASK_XZ) {
-            result.writemask = WRITEMASK_X;
-            emit(AND(result, arg0_ud, src_reg((1u << 31) - 1)));
-            emit(BRW_OPCODE_SHR, result, result_src, src_reg(23u));
-            src_reg result_d(result_src);
-            result_d.type = BRW_REGISTER_TYPE_D; /* does it matter? */
-            result.type = BRW_REGISTER_TYPE_F;
-            emit(ADD(result, result_d, src_reg(-127)));
-         }
-
-         if (vpi->DstReg.WriteMask & WRITEMASK_YZ) {
-            result.writemask = WRITEMASK_Y;
-            result.type = BRW_REGISTER_TYPE_UD;
-            emit(AND(result, arg0_ud, src_reg((1u << 23) - 1)));
-            emit(OR(result, result_src, src_reg(127u << 23)));
-         }
-
-         if (vpi->DstReg.WriteMask & WRITEMASK_Z) {
-            /* result[2] = result[0] + LOG2(result[1]); */
-
-            /* Why bother?  The above is just a hint how to do this with a
-             * taylor series.  Maybe we *should* use a taylor series as by
-             * the time all the above has been done it's almost certainly
-             * quicker than calling the mathbox, even with low precision.
-             *
-             * Options are:
-             *    - result[0] + mathbox.LOG2(result[1])
-             *    - mathbox.LOG2(arg0.x)
-             *    - result[0] + inline_taylor_approx(result[1])
-             */
-            result.type = BRW_REGISTER_TYPE_F;
-            result.writemask = WRITEMASK_Z;
-            src_reg result_x(result), result_y(result), result_z(result);
-            result_x.swizzle = BRW_SWIZZLE_XXXX;
-            result_y.swizzle = BRW_SWIZZLE_YYYY;
-            result_z.swizzle = BRW_SWIZZLE_ZZZZ;
-            emit_math(SHADER_OPCODE_LOG2, result, result_y);
-            emit(ADD(result, result_z, result_x));
-         }
-
-         if (vpi->DstReg.WriteMask & WRITEMASK_W) {
-            result.type = BRW_REGISTER_TYPE_F;
-            result.writemask = WRITEMASK_W;
-            emit(MOV(result, src_reg(1.0f)));
-         }
-         break;
-      }
-
-      case OPCODE_MAD: {
-         src_reg temp = src_reg(this, glsl_type::vec4_type);
-         emit(MUL(dst_reg(temp), src[0], src[1]));
-         emit(ADD(dst, temp, src[2]));
-         break;
-      }
-
-      case OPCODE_MAX:
-         emit_minmax(BRW_CONDITIONAL_GE, dst, src[0], src[1]);
-         break;
-
-      case OPCODE_MIN:
-         emit_minmax(BRW_CONDITIONAL_L, dst, src[0], src[1]);
-         break;
-
-      case OPCODE_MOV:
-         emit(MOV(dst, src[0]));
-         break;
-
-      case OPCODE_MUL:
-         emit(MUL(dst, src[0], src[1]));
-         break;
-
-      case OPCODE_POW:
-         emit_math(SHADER_OPCODE_POW, dst, src[0], src[1]);
-         break;
-
-      case OPCODE_RCP:
-         emit_math(SHADER_OPCODE_RCP, dst, src[0]);
-         break;
-
-      case OPCODE_RSQ:
-         emit_math(SHADER_OPCODE_RSQ, dst, src[0]);
-         break;
-
-      case OPCODE_SGE:
-         emit_vp_sop(BRW_CONDITIONAL_GE, dst, src[0], src[1], one);
-         break;
-
-      case OPCODE_SLT:
-         emit_vp_sop(BRW_CONDITIONAL_L, dst, src[0], src[1], one);
-         break;
-
-      case OPCODE_SUB: {
-         src_reg neg_src1 = src[1];
-         neg_src1.negate = !src[1].negate;
-         emit(ADD(dst, src[0], neg_src1));
-         break;
-      }
-
-      case OPCODE_SWZ:
-         /* Note that SWZ's extended swizzles are handled in the general
-          * get_src_reg() code.
-          */
-         emit(MOV(dst, src[0]));
-         break;
-
-      case OPCODE_XPD: {
-         src_reg t1 = src_reg(this, glsl_type::vec4_type);
-         src_reg t2 = src_reg(this, glsl_type::vec4_type);
-
-         emit(MUL(dst_reg(t1),
-                  swizzle(src[0], BRW_SWIZZLE_YZXW),
-                  swizzle(src[1], BRW_SWIZZLE_ZXYW)));
-         emit(MUL(dst_reg(t2),
-                  swizzle(src[0], BRW_SWIZZLE_ZXYW),
-                  swizzle(src[1], BRW_SWIZZLE_YZXW)));
-         t2.negate = true;
-         emit(ADD(dst, t1, t2));
-         break;
-      }
-
-      case OPCODE_END:
-         break;
-
-      default:
-         assert(!"Unsupported opcode in vertex program");
-      }
-
-      /* Copy the temporary back into the actual destination register. */
-      if (_mesa_num_inst_dst_regs(vpi->Opcode) != 0) {
-         emit(MOV(get_vp_dst_reg(vpi->DstReg), src_reg(dst)));
-      }
-   }
-
-   /* If we used relative addressing, we need to upload all constants as
-    * pull constants.  Do that now.
-    */
-   if (this->need_all_constants_in_pull_buffer) {
-      const struct gl_program_parameter_list *params = vp->Base.Parameters;
-      unsigned i;
-      for (i = 0; i < params->NumParameters * 4; i++) {
-         stage_prog_data->pull_param[i] =
-            &params->ParameterValues[i / 4][i % 4];
-      }
-      stage_prog_data->nr_pull_params = i;
-   }
-}
-
-void
-vec4_vs_visitor::setup_vp_regs()
-{
-   /* PROGRAM_TEMPORARY */
-   int num_temp = prog->NumTemporaries;
-   vp_temp_regs = rzalloc_array(mem_ctx, src_reg, num_temp);
-   for (int i = 0; i < num_temp; i++)
-      vp_temp_regs[i] = src_reg(this, glsl_type::vec4_type);
-
-   /* PROGRAM_STATE_VAR etc. */
-   struct gl_program_parameter_list *plist = vp->Base.Parameters;
-   for (unsigned p = 0; p < plist->NumParameters; p++) {
-      unsigned components = plist->Parameters[p].Size;
-
-      /* Parameters should be either vec4 uniforms or single component
-       * constants; matrices and other larger types should have been broken
-       * down earlier.
-       */
-      assert(components <= 4);
-
-      this->uniform_size[this->uniforms] = 1; /* 1 vec4 */
-      this->uniform_vector_size[this->uniforms] = components;
-      for (unsigned i = 0; i < 4; i++) {
-         stage_prog_data->param[this->uniforms * 4 + i] = i >= components
-            ? 0 : &plist->ParameterValues[p][i];
-      }
-      this->uniforms++; /* counted in vec4 units */
-   }
-
-   /* PROGRAM_OUTPUT */
-   for (int slot = 0; slot < prog_data->vue_map.num_slots; slot++) {
-      int varying = prog_data->vue_map.slot_to_varying[slot];
-      if (varying == VARYING_SLOT_PSIZ)
-         output_reg[varying] = dst_reg(this, glsl_type::float_type);
-      else
-         output_reg[varying] = dst_reg(this, glsl_type::vec4_type);
-      assert(output_reg[varying].type == BRW_REGISTER_TYPE_F);
-   }
-
-   /* PROGRAM_ADDRESS */
-   this->vp_addr_reg = src_reg(this, glsl_type::int_type);
-   assert(this->vp_addr_reg.type == BRW_REGISTER_TYPE_D);
-}
-
-dst_reg
-vec4_vs_visitor::get_vp_dst_reg(const prog_dst_register &dst)
-{
-   dst_reg result;
-
-   assert(!dst.RelAddr);
-
-   switch (dst.File) {
-   case PROGRAM_TEMPORARY:
-      result = dst_reg(vp_temp_regs[dst.Index]);
-      break;
-
-   case PROGRAM_OUTPUT:
-      result = output_reg[dst.Index];
-      break;
-
-   case PROGRAM_ADDRESS: {
-      assert(dst.Index == 0);
-      result = dst_reg(this->vp_addr_reg);
-      break;
-   }
-
-   case PROGRAM_UNDEFINED:
-      return dst_null_f();
-
-   default:
-      unreachable("vec4_vp: bad destination register file");
-   }
-
-   result.writemask = dst.WriteMask;
-   return result;
-}
-
-src_reg
-vec4_vs_visitor::get_vp_src_reg(const prog_src_register &src)
-{
-   struct gl_program_parameter_list *plist = vp->Base.Parameters;
-
-   src_reg result;
-
-   assert(!src.Abs);
-
-   switch (src.File) {
-   case PROGRAM_UNDEFINED:
-      return src_reg(brw_null_reg());
-
-   case PROGRAM_TEMPORARY:
-      result = vp_temp_regs[src.Index];
-      break;
-
-   case PROGRAM_INPUT:
-      result = src_reg(ATTR, src.Index, glsl_type::vec4_type);
-      result.type = BRW_REGISTER_TYPE_F;
-      break;
-
-   case PROGRAM_ADDRESS: {
-      assert(src.Index == 0);
-      result = this->vp_addr_reg;
-      break;
-   }
-
-   case PROGRAM_STATE_VAR:
-   case PROGRAM_CONSTANT:
-      /* From the ARB_vertex_program specification:
-       * "Relative addressing can only be used for accessing program
-       *  parameter arrays."
-       */
-      if (src.RelAddr) {
-         /* Since we have no idea what the base of the array is, we need to
-          * upload ALL constants as push constants.
-          */
-         this->need_all_constants_in_pull_buffer = true;
-
-         /* Add the small constant index to the address register */
-         src_reg reladdr = src_reg(this, glsl_type::int_type);
-
-         dst_reg dst_reladdr = dst_reg(reladdr);
-         dst_reladdr.writemask = WRITEMASK_X;
-         emit(ADD(dst_reladdr, this->vp_addr_reg, src_reg(src.Index)));
-
-         if (devinfo->gen < 6)
-            emit(MUL(dst_reladdr, reladdr, src_reg(16)));
-
-      #if 0
-         assert(src.Index < this->uniforms);
-         result = src_reg(dst_reg(UNIFORM, 0));
-         result.type = BRW_REGISTER_TYPE_F;
-         result.reladdr = new(mem_ctx) src_reg();
-         memcpy(result.reladdr, &reladdr, sizeof(src_reg));
-      #endif
-
-         result = src_reg(this, glsl_type::vec4_type);
-         src_reg surf_index = src_reg(unsigned(prog_data->base.binding_table.pull_constants_start));
-
-         emit_pull_constant_load_reg(dst_reg(result),
-                                     surf_index,
-                                     reladdr,
-                                     NULL, NULL /* before_block/inst */);
-         break;
-      }
-
-      /* We actually want to look at the type in the Parameters list for this,
-       * because this lets us upload constant builtin uniforms as actual
-       * constants.
-       */
-      switch (plist->Parameters[src.Index].Type) {
-      case PROGRAM_CONSTANT:
-         result = src_reg(this, glsl_type::vec4_type);
-         for (int i = 0; i < 4; i++) {
-            dst_reg t = dst_reg(result);
-            t.writemask = 1 << i;
-            emit(MOV(t, src_reg(plist->ParameterValues[src.Index][i].f)));
-         }
-         break;
-
-      case PROGRAM_STATE_VAR:
-         assert(src.Index < this->uniforms);
-         result = src_reg(dst_reg(UNIFORM, src.Index));
-         result.type = BRW_REGISTER_TYPE_F;
-         break;
-
-      default:
-         assert(!"Bad uniform in src register file");
-         return src_reg(this, glsl_type::vec4_type);
-      }
-      break;
-
-   default:
-      assert(!"Bad src register file");
-      return src_reg(this, glsl_type::vec4_type);
-   }
-
-   if (src.Swizzle != SWIZZLE_NOOP || src.Negate) {
-      unsigned short zeros_mask = 0;
-      unsigned short ones_mask = 0;
-      unsigned short src_mask = 0;
-      unsigned short src_swiz[4];
-
-      for (int i = 0; i < 4; i++) {
-         src_swiz[i] = 0; /* initialize for safety */
-
-         /* The ZERO, ONE, and Negate options are only used for OPCODE_SWZ,
-          * but it's simplest to handle it here.
-          */
-         int s = GET_SWZ(src.Swizzle, i);
-         switch (s) {
-         case SWIZZLE_X:
-         case SWIZZLE_Y:
-         case SWIZZLE_Z:
-         case SWIZZLE_W:
-            src_mask |= 1 << i;
-            src_swiz[i] = s;
-            break;
-         case SWIZZLE_ZERO:
-            zeros_mask |= 1 << i;
-            break;
-         case SWIZZLE_ONE:
-            ones_mask |= 1 << i;
-            break;
-         }
-      }
-
-      result.swizzle =
-         BRW_SWIZZLE4(src_swiz[0], src_swiz[1], src_swiz[2], src_swiz[3]);
-
-      /* The hardware doesn't natively handle the SWZ instruction's zero/one
-       * swizzles or per-component negation, so we need to use a temporary.
-       */
-      if (zeros_mask || ones_mask || src.Negate) {
-         src_reg temp_src(this, glsl_type::vec4_type);
-         dst_reg temp(temp_src);
-
-         if (src_mask) {
-            temp.writemask = src_mask;
-            emit(MOV(temp, result));
-         }
-
-         if (zeros_mask) {
-            temp.writemask = zeros_mask;
-            emit(MOV(temp, src_reg(0.0f)));
-         }
-
-         if (ones_mask) {
-            temp.writemask = ones_mask;
-            emit(MOV(temp, src_reg(1.0f)));
-         }
-
-         if (src.Negate) {
-            temp.writemask = src.Negate;
-            src_reg neg(temp_src);
-            neg.negate = true;
-            emit(MOV(temp, neg));
-         }
-         result = temp_src;
-      }
-   }
-
-   return result;
-}
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp

index f4b50ba9d9fd74a4ddce6252752658a14937263f..b6e1971c2ee1a613ce1c827fe7a1af0694fed5d1 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp
@@ -262,7 +262,6 @@ vec4_vs_visitor::setup_uniform_clipplane_values()
  {
     for (int i = 0; i < key->nr_userclip_plane_consts; ++i) {
        assert(this->uniforms < uniform_array_size);
-      this->uniform_vector_size[this->uniforms] = 4;
        this->userplane[i] = dst_reg(UNIFORM, this->uniforms);
        this->userplane[i].type = BRW_REGISTER_TYPE_F;
        for (int j = 0; j < 4; ++j) {
@@ -302,20 +301,15 @@ vec4_vs_visitor::vec4_vs_visitor(const struct brw_compiler *compiler,
                                   void *log_data,
                                   const struct brw_vs_prog_key *key,
                                   struct brw_vs_prog_data *vs_prog_data,
-                                 struct gl_vertex_program *vp,
-                                 struct gl_shader_program *prog,
+                                 nir_shader *shader,
                                   gl_clip_plane *clip_planes,
                                   void *mem_ctx,
                                   int shader_time_index,
                                   bool use_legacy_snorm_formula)
-   : vec4_visitor(compiler, log_data,
-                  &vp->Base, &key->tex, &vs_prog_data->base, prog,
-                  MESA_SHADER_VERTEX,
-                  mem_ctx, false /* no_spills */,
-                  shader_time_index),
+   : vec4_visitor(compiler, log_data, &key->tex, &vs_prog_data->base, shader,
+                  mem_ctx, false /* no_spills */, shader_time_index),
       key(key),
       vs_prog_data(vs_prog_data),
-     vp(vp),
       clip_planes(clip_planes),
       use_legacy_snorm_formula(use_legacy_snorm_formula)
  {
diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c

index 0c60bde511e7d6097b0a6b68bd86cc20705c5770..3c6ee0a7a03ba6e4ff611ac996bfb84ac42a5bb7 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_vs.c
+++ b/src/mesa/drivers/dri/i965/brw_vs.c
@@ -37,6 +37,7 @@
  #include "brw_state.h"
  #include "program/prog_print.h"
  #include "program/prog_parameter.h"
+#include "brw_nir.h"
  
  #include "util/ralloc.h"
  
@@ -65,27 +66,6 @@ gl_clip_plane *brw_select_clip_planes(struct gl_context *ctx)
     }
  }
  
-
-bool
-brw_vs_prog_data_compare(const void *in_a, const void *in_b)
-{
-   const struct brw_vs_prog_data *a = in_a;
-   const struct brw_vs_prog_data *b = in_b;
-
-   /* Compare the base structure. */
-   if (!brw_stage_prog_data_compare(&a->base.base, &b->base.base))
-      return false;
-
-   /* Compare the rest of the struct. */
-   const unsigned offset = sizeof(struct brw_stage_prog_data);
-   if (memcmp(((char *) a) + offset, ((char *) b) + offset,
-              sizeof(struct brw_vs_prog_data) - offset)) {
-      return false;
-   }
-
-   return true;
-}
-
  bool
  brw_codegen_vs_prog(struct brw_context *brw,
                      struct gl_shader_program *prog,
@@ -102,6 +82,18 @@ brw_codegen_vs_prog(struct brw_context *brw,
     bool start_busy = false;
     double start_time = 0;
  
+   if (!vp->program.Base.nir) {
+      /* Normally we generate NIR in LinkShader() or
+       * ProgramStringNotify(), but Mesa's fixed-function vertex program
+       * handling doesn't notify the driver at all.  Just do it here, at
+       * the last minute, even though it's lame.
+       */
+      assert(vp->program.Base.Id == 0 && prog == NULL);
+      vp->program.Base.nir =
+         brw_create_nir(brw, NULL, &vp->program.Base, MESA_SHADER_VERTEX,
+                        brw->intelScreen->compiler->scalar_vs);
+   }
+
     if (prog)
        vs = (struct brw_shader *) prog->_LinkedShaders[MESA_SHADER_VERTEX];
  
@@ -113,22 +105,22 @@ brw_codegen_vs_prog(struct brw_context *brw,
  
     mem_ctx = ralloc_context(NULL);
  
+   brw_assign_common_binding_table_offsets(MESA_SHADER_VERTEX,
+                                           brw->intelScreen->devinfo,
+                                           prog, &vp->program.Base,
+                                           &prog_data.base.base, 0);
+
     /* Allocate the references to the uniforms that will end up in the
      * prog_data associated with the compiled program, and which will be freed
      * by the state cache.
      */
-   int param_count;
-   if (vs) {
-      /* We add padding around uniform values below vec4 size, with the worst
-       * case being a float value that gets blown up to a vec4, so be
-       * conservative here.
-       */
-      param_count = vs->base.num_uniform_components * 4 +
-                    vs->base.NumImages * BRW_IMAGE_PARAM_SIZE;
-      stage_prog_data->nr_image_params = vs->base.NumImages;
-   } else {
-      param_count = vp->program.Base.Parameters->NumParameters * 4;
-   }
+   int param_count = vp->program.Base.nir->num_uniforms;
+   if (!brw->intelScreen->compiler->scalar_vs)
+      param_count *= 4;
+
+   if (vs)
+      prog_data.base.base.nr_image_params = vs->base.NumImages;
+
     /* vec4_visitor::setup_uniform_clipplane_values() also uploads user clip
      * planes as uniforms.
      */
@@ -143,6 +135,15 @@ brw_codegen_vs_prog(struct brw_context *brw,
                      stage_prog_data->nr_image_params);
     stage_prog_data->nr_params = param_count;
  
+   if (prog) {
+      brw_nir_setup_glsl_uniforms(vp->program.Base.nir, prog, &vp->program.Base,
+                                  &prog_data.base.base,
+                                  brw->intelScreen->compiler->scalar_vs);
+   } else {
+      brw_nir_setup_arb_uniforms(vp->program.Base.nir, &vp->program.Base,
+                                 &prog_data.base.base);
+   }
+
     GLbitfield64 outputs_written = vp->program.Base.OutputsWritten;
     prog_data.inputs_read = vp->program.Base.InputsRead;
  
diff --git a/src/mesa/drivers/dri/i965/brw_vs.h b/src/mesa/drivers/dri/i965/brw_vs.h

index 3a847fcd28a42e7f54f709b456e3b1859723900c..96d2435a51568fe46f9cc9e2ff5ac64e0075f64e 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_vs.h
+++ b/src/mesa/drivers/dri/i965/brw_vs.h
@@ -64,7 +64,6 @@ const unsigned *brw_vs_emit(struct brw_context *brw,
  void brw_vs_debug_recompile(struct brw_context *brw,
                              struct gl_shader_program *prog,
                              const struct brw_vs_prog_key *key);
-bool brw_vs_prog_data_compare(const void *a, const void *b);
  
  void
  brw_upload_vs_prog(struct brw_context *brw);
@@ -88,8 +87,7 @@ public:
                     void *log_data,
                     const struct brw_vs_prog_key *key,
                     struct brw_vs_prog_data *vs_prog_data,
-                   struct gl_vertex_program *vp,
-                   struct gl_shader_program *prog,
+                   nir_shader *shader,
                     gl_clip_plane *clip_planes,
                     void *mem_ctx,
                     int shader_time_index,
@@ -100,7 +98,6 @@ protected:
                                                const glsl_type *type);
     virtual void setup_payload();
     virtual void emit_prolog();
-   virtual void emit_program_code();
     virtual void emit_thread_end();
     virtual void emit_urb_write_header(int mrf);
     virtual void emit_urb_slot(dst_reg reg, int varying);
@@ -116,7 +113,6 @@ private:
  
     const struct brw_vs_prog_key *const key;
     struct brw_vs_prog_data * const vs_prog_data;
-   struct gl_vertex_program *const vp;
     src_reg *vp_temp_regs;
     src_reg vp_addr_reg;
  
diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c

index 1faf2eaa346cd9e1ae32d1098bba97260eaa8481..087bf5a53e1589a5eb47b198966235f414b97e53 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_wm.c
+++ b/src/mesa/drivers/dri/i965/brw_wm.c
@@ -35,6 +35,7 @@
  #include "program/prog_parameter.h"
  #include "program/program.h"
  #include "intel_mipmap_tree.h"
+#include "brw_nir.h"
  
  #include "util/ralloc.h"
  
@@ -131,23 +132,24 @@ computed_depth_mode(struct gl_fragment_program *fp)
     return BRW_PSCDEPTH_OFF;
  }
  
-bool
-brw_wm_prog_data_compare(const void *in_a, const void *in_b)
+static void
+assign_fs_binding_table_offsets(const struct brw_device_info *devinfo,
+                                const struct gl_shader_program *shader_prog,
+                                const struct gl_program *prog,
+                                const struct brw_wm_prog_key *key,
+                                struct brw_wm_prog_data *prog_data)
  {
-   const struct brw_wm_prog_data *a = in_a;
-   const struct brw_wm_prog_data *b = in_b;
-
-   /* Compare the base structure. */
-   if (!brw_stage_prog_data_compare(&a->base, &b->base))
-      return false;
+   uint32_t next_binding_table_offset = 0;
  
-   /* Compare the rest of the structure. */
-   const unsigned offset = sizeof(struct brw_stage_prog_data);
-   if (memcmp(((char *) a) + offset, ((char *) b) + offset,
-              sizeof(struct brw_wm_prog_data) - offset))
-      return false;
+   /* If there are no color regions, we still perform an FB write to a null
+    * renderbuffer, which we place at surface index 0.
+    */
+   prog_data->binding_table.render_target_start = next_binding_table_offset;
+   next_binding_table_offset += MAX2(key->nr_color_regions, 1);
  
-   return true;
+   brw_assign_common_binding_table_offsets(MESA_SHADER_FRAGMENT, devinfo,
+                                           shader_prog, prog, &prog_data->base,
+                                           next_binding_table_offset);
  }
  
  /**
@@ -188,18 +190,16 @@ brw_codegen_wm_prog(struct brw_context *brw,
     if (!prog)
        prog_data.base.use_alt_mode = true;
  
+   assign_fs_binding_table_offsets(brw->intelScreen->devinfo, prog,
+                                   &fp->program.Base, key, &prog_data);
+
     /* Allocate the references to the uniforms that will end up in the
      * prog_data associated with the compiled program, and which will be freed
      * by the state cache.
      */
-   int param_count;
-   if (fs) {
-      param_count = fs->base.num_uniform_components +
-                    fs->base.NumImages * BRW_IMAGE_PARAM_SIZE;
+   int param_count = fp->program.Base.nir->num_uniforms;
+   if (fs)
        prog_data.base.nr_image_params = fs->base.NumImages;
-   } else {
-      param_count = fp->program.Base.Parameters->NumParameters * 4;
-   }
     /* The backend also sometimes adds params for texture size. */
     param_count += 2 * ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits;
     prog_data.base.param =
@@ -211,6 +211,14 @@ brw_codegen_wm_prog(struct brw_context *brw,
                      prog_data.base.nr_image_params);
     prog_data.base.nr_params = param_count;
  
+   if (prog) {
+      brw_nir_setup_glsl_uniforms(fp->program.Base.nir, prog, &fp->program.Base,
+                                  &prog_data.base, true);
+   } else {
+      brw_nir_setup_arb_uniforms(fp->program.Base.nir, &fp->program.Base,
+                                 &prog_data.base);
+   }
+
     prog_data.barycentric_interp_modes =
        brw_compute_barycentric_interp_modes(brw, key->flat_shade,
                                             key->persample_shading,
diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h

index 73a741f89e4a3cafdff86156cbd06d026bd96e42..667edf2eddf7a9a20724f362ed9afe71b2d32d1a 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_wm.h
+++ b/src/mesa/drivers/dri/i965/brw_wm.h
@@ -85,7 +85,6 @@ bool brw_codegen_wm_prog(struct brw_context *brw,
  void brw_wm_debug_recompile(struct brw_context *brw,
                              struct gl_shader_program *prog,
                              const struct brw_wm_prog_key *key);
-bool brw_wm_prog_data_compare(const void *a, const void *b);
  
  void
  brw_upload_wm_prog(struct brw_context *brw);
diff --git a/src/mesa/drivers/dri/i965/brw_wm_iz.cpp b/src/mesa/drivers/dri/i965/brw_wm_iz.cpp

index 14930eb01842761b3c0469c7377713fa05e78c90..6f22f29447638495d8cfa97cd86238348c7d91a7 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_wm_iz.cpp
+++ b/src/mesa/drivers/dri/i965/brw_wm_iz.cpp
@@ -124,12 +124,11 @@ void fs_visitor::setup_payload_gen4()
  {
     assert(stage == MESA_SHADER_FRAGMENT);
     brw_wm_prog_key *key = (brw_wm_prog_key*) this->key;
-   gl_fragment_program *fp = (gl_fragment_program*) prog;
     GLuint reg = 2;
     bool kill_stats_promoted_workaround = false;
     int lookup = key->iz_lookup;
     bool uses_depth =
-      (fp->Base.InputsRead & (1 << VARYING_SLOT_POS)) != 0;
+      (nir->info.inputs_read & (1 << VARYING_SLOT_POS)) != 0;
  
     assert(lookup < IZ_BIT_MAX);
  
diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c

index c9316963840f9675f097becda12dba3f77d6b8ad..c671e23827ef5cde6065d9c03756d7a6c3434725 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -380,7 +380,7 @@ brw_update_texture_surface(struct gl_context *ctx,
     surf[4] = (brw_get_surface_num_multisamples(mt->num_samples) |
                SET_FIELD(tObj->BaseLevel - mt->first_level, BRW_SURFACE_MIN_LOD));
  
-   surf[5] = mt->align_h == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0;
+   surf[5] = mt->valign == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0;
  
     /* Emit relocation to surface contents */
     drm_intel_bo_emit_reloc(brw->batch.bo,
@@ -718,7 +718,7 @@ brw_update_renderbuffer_surface(struct brw_context *brw,
     assert(tile_y % 2 == 0);
     surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT |
               (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT |
-             (mt->align_h == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
+             (mt->valign == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
  
     if (brw->gen < 6) {
        /* _NEW_COLOR */
@@ -1001,6 +1001,32 @@ const struct brw_tracked_state brw_wm_ubo_surfaces = {
     .emit = brw_upload_wm_ubo_surfaces,
  };
  
+static void
+brw_upload_cs_ubo_surfaces(struct brw_context *brw)
+{
+   struct gl_context *ctx = &brw->ctx;
+   /* _NEW_PROGRAM */
+   struct gl_shader_program *prog =
+      ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
+
+   if (!prog)
+      return;
+
+   /* BRW_NEW_CS_PROG_DATA */
+   brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_COMPUTE],
+                           &brw->cs.base, &brw->cs.prog_data->base, true);
+}
+
+const struct brw_tracked_state brw_cs_ubo_surfaces = {
+   .dirty = {
+      .mesa = _NEW_PROGRAM,
+      .brw = BRW_NEW_BATCH |
+             BRW_NEW_CS_PROG_DATA |
+             BRW_NEW_UNIFORM_BUFFER,
+   },
+   .emit = brw_upload_cs_ubo_surfaces,
+};
+
  void
  brw_upload_abo_surfaces(struct brw_context *brw,
                         struct gl_shader_program *prog,
diff --git a/src/mesa/drivers/dri/i965/gen6_blorp.cpp b/src/mesa/drivers/dri/i965/gen6_blorp.cpp

index cba5c2f456aed8229de122fe580d5742e2655f43..74f3a70a64e4ae9cb835f31ffb9f63adec7cb04e 100644 (file)
--- a/src/mesa/drivers/dri/i965/gen6_blorp.cpp
+++ b/src/mesa/drivers/dri/i965/gen6_blorp.cpp
@@ -413,7 +413,7 @@ gen6_blorp_emit_surface_state(struct brw_context *brw,
     assert(tile_y % 2 == 0);
     surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT |
                (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT |
-              (surface->mt->align_h == 4 ?
+              (surface->mt->valign == 4 ?
                 BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
  
     /* Emit relocation to surface contents */
diff --git a/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp b/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp

index 4c9c96028f8fd517f620e9defe5ba6d197b6395c..def21d80b2453ae90d03d2ef09030515555d7afb 100644 (file)
--- a/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp
@@ -35,15 +35,6 @@ const unsigned MAX_GS_INPUT_VERTICES = 6;
  
  namespace brw {
  
-void
-gen6_gs_visitor::assign_binding_table_offsets()
-{
-   /* In gen6 we reserve the first BRW_MAX_SOL_BINDINGS entries for transform
-    * feedback surfaces.
-    */
-   assign_common_binding_table_offsets(BRW_MAX_SOL_BINDINGS);
-}
-
  void
  gen6_gs_visitor::emit_prolog()
  {
@@ -146,27 +137,6 @@ gen6_gs_visitor::emit_prolog()
     }
  }
  
-void
-gen6_gs_visitor::visit(ir_emit_vertex *ir)
-{
-   /* To ensure that we don't output more vertices than the shader specified
-    * using max_vertices, do the logic inside a conditional of the form "if
-    * (vertex_count < MAX)"
-    */
-   unsigned num_output_vertices = c->gp->program.VerticesOut;
-   emit(CMP(dst_null_d(), this->vertex_count,
-            src_reg(num_output_vertices), BRW_CONDITIONAL_L));
-   emit(IF(BRW_PREDICATE_NORMAL));
-
-   gs_emit_vertex(ir->stream_id());
-
-   this->current_annotation = "emit vertex: increment vertex count";
-   emit(ADD(dst_reg(this->vertex_count), this->vertex_count,
-            src_reg(1u)));
-
-   emit(BRW_OPCODE_ENDIF);
-}
-
  void
  gen6_gs_visitor::gs_emit_vertex(int stream_id)
  {
@@ -230,12 +200,6 @@ gen6_gs_visitor::gs_emit_vertex(int stream_id)
              this->vertex_output_offset, 1u));
  }
  
-void
-gen6_gs_visitor::visit(ir_end_primitive *)
-{
-   gs_end_primitive();
-}
-
  void
  gen6_gs_visitor::gs_end_primitive()
  {
@@ -356,9 +320,7 @@ gen6_gs_visitor::emit_thread_end()
     if (c->gp->program.OutputType != GL_POINTS) {
        emit(CMP(dst_null_d(), this->first_vertex, 0u, BRW_CONDITIONAL_Z));
        emit(IF(BRW_PREDICATE_NORMAL));
-      {
-         visit((ir_end_primitive *) NULL);
-      }
+      gs_end_primitive();
        emit(BRW_OPCODE_ENDIF);
     }
  
diff --git a/src/mesa/drivers/dri/i965/gen6_gs_visitor.h b/src/mesa/drivers/dri/i965/gen6_gs_visitor.h

index 4cf948932614a606a1b94b9183f10b60e4904878..41c6d183acd74324cb716e50d5c9b72ef370690b 100644 (file)
--- a/src/mesa/drivers/dri/i965/gen6_gs_visitor.h
+++ b/src/mesa/drivers/dri/i965/gen6_gs_visitor.h
@@ -39,18 +39,16 @@ public:
                     void *log_data,
                     struct brw_gs_compile *c,
                     struct gl_shader_program *prog,
+                   nir_shader *shader,
                     void *mem_ctx,
                     bool no_spills,
                     int shader_time_index) :
-      vec4_gs_visitor(comp, log_data, c, prog, mem_ctx, no_spills,
+      vec4_gs_visitor(comp, log_data, c, prog, shader, mem_ctx, no_spills,
                        shader_time_index) {}
  
  protected:
-   virtual void assign_binding_table_offsets();
     virtual void emit_prolog();
     virtual void emit_thread_end();
-   virtual void visit(ir_emit_vertex *);
-   virtual void visit(ir_end_primitive *);
     virtual void gs_emit_vertex(int stream_id);
     virtual void gs_end_primitive();
     virtual void emit_urb_write_header(int mrf);
diff --git a/src/mesa/drivers/dri/i965/gen6_surface_state.c b/src/mesa/drivers/dri/i965/gen6_surface_state.c

index 39de62f2304b12d94176d32a93caedd6a6e74884..d892c932af4f187b5412a57b9b7adb0b3d2ff346 100644 (file)
--- a/src/mesa/drivers/dri/i965/gen6_surface_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_surface_state.c
@@ -126,7 +126,7 @@ gen6_update_renderbuffer_surface(struct brw_context *brw,
               SET_FIELD(min_array_element, BRW_SURFACE_MIN_ARRAY_ELEMENT) |
               SET_FIELD(depth - 1, BRW_SURFACE_RENDER_TARGET_VIEW_EXTENT);
  
-   surf[5] = (mt->align_h == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0);
+   surf[5] = (mt->valign == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0);
  
     drm_intel_bo_emit_reloc(brw->batch.bo,
                             offset + 4,
diff --git a/src/mesa/drivers/dri/i965/gen7_blorp.cpp b/src/mesa/drivers/dri/i965/gen7_blorp.cpp

index 9822dc1fe7980ddd44a375589fe35d6d9c6b7a8b..f90e78e43c739a1c68a9da64c1e97b292759b1f2 100644 (file)
--- a/src/mesa/drivers/dri/i965/gen7_blorp.cpp
+++ b/src/mesa/drivers/dri/i965/gen7_blorp.cpp
@@ -158,9 +158,9 @@ gen7_blorp_emit_surface_state(struct brw_context *brw,
               surface->brw_surfaceformat << BRW_SURFACE_FORMAT_SHIFT |
               gen7_surface_tiling_mode(tiling);
  
-   if (surface->mt->align_h == 4)
+   if (surface->mt->valign == 4)
        surf[0] |= GEN7_SURFACE_VALIGN_4;
-   if (surface->mt->align_w == 8)
+   if (surface->mt->halign == 8)
        surf[0] |= GEN7_SURFACE_HALIGN_8;
  
     if (surface->array_layout == ALL_SLICES_AT_EACH_LOD)
diff --git a/src/mesa/drivers/dri/i965/gen7_cs_state.c b/src/mesa/drivers/dri/i965/gen7_cs_state.c

index 0b88b2c0e71cec521e2f75f49ff9d72a28fcce03..5edc4fc98423b08fae80cad6f34f9c7d021a81f1 100644 (file)
--- a/src/mesa/drivers/dri/i965/gen7_cs_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_cs_state.c
@@ -72,7 +72,7 @@ brw_upload_cs_state(struct brw_context *brw)
  
     if (prog->SystemValuesRead & SYSTEM_BIT_LOCAL_INVOCATION_ID) {
        local_id_dwords =
-         brw_cs_prog_local_id_payload_dwords(prog, cs_prog_data->simd_size);
+         brw_cs_prog_local_id_payload_dwords(cs_prog_data->simd_size);
     }
  
     unsigned push_constant_data_size =
@@ -216,8 +216,7 @@ const struct brw_tracked_state brw_cs_state = {
   *
   */
  unsigned
-brw_cs_prog_local_id_payload_dwords(const struct gl_program *prog,
-                                    unsigned dispatch_width)
+brw_cs_prog_local_id_payload_dwords(unsigned dispatch_width)
  {
     return 3 * dispatch_width;
  }
@@ -272,7 +271,7 @@ brw_upload_cs_push_constants(struct brw_context *brw,
  
     if (prog->SystemValuesRead & SYSTEM_BIT_LOCAL_INVOCATION_ID) {
        local_id_dwords =
-         brw_cs_prog_local_id_payload_dwords(prog, cs_prog_data->simd_size);
+         brw_cs_prog_local_id_payload_dwords(cs_prog_data->simd_size);
     }
  
     /* Updates the ParamaterValues[i] pointers for all parameters of the
diff --git a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c

index 15ab2b0eae3a1de6d273ef1ad29663c2e73ffafc..5080f1c3fe461dbf93fc19e1568a353b50427127 100644 (file)
--- a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
@@ -288,9 +288,9 @@ gen7_emit_texture_surface_state(struct brw_context *brw,
     if (target == GL_TEXTURE_CUBE_MAP || target == GL_TEXTURE_CUBE_MAP_ARRAY)
        surf[0] |= BRW_SURFACE_CUBEFACE_ENABLES;
  
-   if (mt->align_h == 4)
+   if (mt->valign == 4)
        surf[0] |= GEN7_SURFACE_VALIGN_4;
-   if (mt->align_w == 8)
+   if (mt->halign == 8)
        surf[0] |= GEN7_SURFACE_HALIGN_8;
  
     if (_mesa_is_array_texture(target) || target == GL_TEXTURE_CUBE_MAP)
@@ -509,9 +509,9 @@ gen7_update_renderbuffer_surface(struct brw_context *brw,
                   GEN7_SURFACE_ARYSPC_LOD0 : GEN7_SURFACE_ARYSPC_FULL) |
               gen7_surface_tiling_mode(mt->tiling);
  
-   if (irb->mt->align_h == 4)
+   if (irb->mt->valign == 4)
        surf[0] |= GEN7_SURFACE_VALIGN_4;
-   if (irb->mt->align_w == 8)
+   if (irb->mt->halign == 8)
        surf[0] |= GEN7_SURFACE_HALIGN_8;
  
     if (is_array) {
diff --git a/src/mesa/drivers/dri/i965/gen8_surface_state.c b/src/mesa/drivers/dri/i965/gen8_surface_state.c

index c5f1bae8ecb2046d7073c3ee6cfd48315397d040..e1e7704655d84a280bc80d9cb6fa774488c3dfe4 100644 (file)
--- a/src/mesa/drivers/dri/i965/gen8_surface_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_surface_state.c
@@ -95,7 +95,7 @@ vertical_alignment(const struct brw_context *brw,
          surf_type == BRW_SURFACE_1D))
        return GEN8_SURFACE_VALIGN_4;
  
-   switch (mt->align_h) {
+   switch (mt->valign) {
     case 4:
        return GEN8_SURFACE_VALIGN_4;
     case 8:
@@ -120,7 +120,7 @@ horizontal_alignment(const struct brw_context *brw,
          gen9_use_linear_1d_layout(brw, mt)))
        return GEN8_SURFACE_HALIGN_4;
  
-   switch (mt->align_w) {
+   switch (mt->halign) {
     case 4:
        return GEN8_SURFACE_HALIGN_4;
     case 8:
@@ -221,8 +221,8 @@ gen8_emit_texture_surface_state(struct brw_context *brw,
         * "When Auxiliary Surface Mode is set to AUX_CCS_D or AUX_CCS_E, HALIGN
         *  16 must be used."
         */
-      assert(brw->gen < 9 || mt->align_w == 16);
-      assert(brw->gen < 8 || mt->num_samples > 1 || mt->align_w == 16);
+      assert(brw->gen < 9 || mt->halign == 16);
+      assert(brw->gen < 8 || mt->num_samples > 1 || mt->halign == 16);
     }
  
     const uint32_t surf_type = translate_tex_target(target);
@@ -470,8 +470,8 @@ gen8_update_renderbuffer_surface(struct brw_context *brw,
         * "When Auxiliary Surface Mode is set to AUX_CCS_D or AUX_CCS_E, HALIGN
         *  16 must be used."
         */
-      assert(brw->gen < 9 || mt->align_w == 16);
-      assert(brw->gen < 8 || mt->num_samples > 1 || mt->align_w == 16);
+      assert(brw->gen < 9 || mt->halign == 16);
+      assert(brw->gen < 8 || mt->num_samples > 1 || mt->halign == 16);
     }
  
     uint32_t *surf = allocate_surface_state(brw, &offset, surf_index);
diff --git a/src/mesa/drivers/dri/i965/intel_copy_image.c b/src/mesa/drivers/dri/i965/intel_copy_image.c

index d57651cef5f5a72548ff53bb4cc17b7a5a494201..0a3337ee2856c6ba43b2a427d5050ace63b1bfba 100644 (file)
--- a/src/mesa/drivers/dri/i965/intel_copy_image.c
+++ b/src/mesa/drivers/dri/i965/intel_copy_image.c
@@ -235,7 +235,7 @@ intel_copy_image_sub_data(struct gl_context *ctx,
     } else {
        assert(dst_renderbuffer);
        dst_mt = intel_renderbuffer(dst_renderbuffer)->mt;
-      src_image = src_renderbuffer->TexImage;
+      dst_image = dst_renderbuffer->TexImage;
     }
  
     if (src_mt->num_samples > 0 || dst_mt->num_samples > 0) {
diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c

index 9c0304c76847354d0a9c38634846730bc074137e..ffc356c9240ea55df9e962f6c1ed7806762f76cb 100644 (file)
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -694,7 +694,7 @@ intel_miptree_create(struct brw_context *brw,
     if (intel_tiling_supports_non_msrt_mcs(brw, mt->tiling) &&
         intel_miptree_is_fast_clear_capable(brw, mt)) {
        mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_RESOLVED;
-      assert(brw->gen < 8 || mt->align_w == 16 || num_samples <= 1);
+      assert(brw->gen < 8 || mt->halign == 16 || num_samples <= 1);
     }
  
     return mt;
@@ -2168,16 +2168,18 @@ intel_miptree_map_blit(struct brw_context *brw,
                        struct intel_miptree_map *map,
                        unsigned int level, unsigned int slice)
  {
-   map->mt = intel_miptree_create(brw, GL_TEXTURE_2D, mt->format,
-                                  0, 0,
-                                  map->w, map->h, 1,
-                                  0, MIPTREE_LAYOUT_TILING_NONE);
+   map->linear_mt = intel_miptree_create(brw, GL_TEXTURE_2D, mt->format,
+                                         /* first_level */ 0,
+                                         /* last_level */ 0,
+                                         map->w, map->h, 1,
+                                         /* samples */ 0,
+                                         MIPTREE_LAYOUT_TILING_NONE);
  
-   if (!map->mt) {
+   if (!map->linear_mt) {
        fprintf(stderr, "Failed to allocate blit temporary\n");
        goto fail;
     }
-   map->stride = map->mt->pitch;
+   map->stride = map->linear_mt->pitch;
  
     /* One of either READ_BIT or WRITE_BIT or both is set.  READ_BIT implies no
      * INVALIDATE_RANGE_BIT.  WRITE_BIT needs the original values read in unless
@@ -2188,7 +2190,7 @@ intel_miptree_map_blit(struct brw_context *brw,
        if (!intel_miptree_blit(brw,
                                mt, level, slice,
                                map->x, map->y, false,
-                              map->mt, 0, 0,
+                              map->linear_mt, 0, 0,
                                0, 0, false,
                                map->w, map->h, GL_COPY)) {
           fprintf(stderr, "Failed to blit\n");
@@ -2196,7 +2198,7 @@ intel_miptree_map_blit(struct brw_context *brw,
        }
     }
  
-   map->ptr = intel_miptree_map_raw(brw, map->mt);
+   map->ptr = intel_miptree_map_raw(brw, map->linear_mt);
  
     DBG("%s: %d,%d %dx%d from mt %p (%s) %d,%d = %p/%d\n", __func__,
         map->x, map->y, map->w, map->h,
@@ -2206,7 +2208,7 @@ intel_miptree_map_blit(struct brw_context *brw,
     return;
  
  fail:
-   intel_miptree_release(&map->mt);
+   intel_miptree_release(&map->linear_mt);
     map->ptr = NULL;
     map->stride = 0;
  }
@@ -2220,11 +2222,11 @@ intel_miptree_unmap_blit(struct brw_context *brw,
  {
     struct gl_context *ctx = &brw->ctx;
  
-   intel_miptree_unmap_raw(map->mt);
+   intel_miptree_unmap_raw(map->linear_mt);
  
     if (map->mode & GL_MAP_WRITE_BIT) {
        bool ok = intel_miptree_blit(brw,
-                                   map->mt, 0, 0,
+                                   map->linear_mt, 0, 0,
                                     0, 0, false,
                                     mt, level, slice,
                                     map->x, map->y, false,
@@ -2232,7 +2234,7 @@ intel_miptree_unmap_blit(struct brw_context *brw,
        WARN_ONCE(!ok, "Failed to blit from linear temporary mapping");
     }
  
-   intel_miptree_release(&map->mt);
+   intel_miptree_release(&map->linear_mt);
  }
  
  /**
@@ -2756,7 +2758,7 @@ intel_miptree_unmap(struct brw_context *brw,
        intel_miptree_unmap_etc(brw, mt, map, level, slice);
     } else if (mt->stencil_mt && !(map->mode & BRW_MAP_DIRECT_BIT)) {
        intel_miptree_unmap_depthstencil(brw, mt, map, level, slice);
-   } else if (map->mt) {
+   } else if (map->linear_mt) {
        intel_miptree_unmap_blit(brw, mt, map, level, slice);
  #if defined(USE_SSE41)
     } else if (map->buffer && cpu_has_sse4_1) {
diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h

index 7610d754451e1fabb5243f163812ec2e1b4cb733..486e5c6f43b7b3accde8e753a0aa56b5a7254e3f 100644 (file)
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
@@ -62,9 +62,11 @@ struct intel_resolve_map;
  struct intel_texture_image;
  
  /**
+ * This bit extends the set of GL_MAP_*_BIT enums.
+ *
   * When calling intel_miptree_map() on an ETC-transcoded-to-RGB miptree or a
   * depthstencil-split-to-separate-stencil miptree, we'll normally make a
- * tmeporary and recreate the kind of data requested by Mesa core, since we're
+ * temporary and recreate the kind of data requested by Mesa core, since we're
   * satisfying some glGetTexImage() request or something.
   *
   * However, occasionally you want to actually map the miptree's current data
@@ -73,14 +75,14 @@ struct intel_texture_image;
  #define BRW_MAP_DIRECT_BIT     0x80000000
  
  struct intel_miptree_map {
-   /** Bitfield of GL_MAP_READ_BIT, GL_MAP_WRITE_BIT, GL_MAP_INVALIDATE_BIT */
+   /** Bitfield of GL_MAP_*_BIT and BRW_MAP_*_BIT. */
     GLbitfield mode;
     /** Region of interest for the map. */
     int x, y, w, h;
     /** Possibly malloced temporary buffer for the mapping. */
     void *buffer;
     /** Possible pointer to a temporary linear miptree for the mapping. */
-   struct intel_mipmap_tree *mt;
+   struct intel_mipmap_tree *linear_mt;
     /** Pointer to the start of (map_x, map_y) returned by the mapping. */
     void *ptr;
     /** Stride of the mapping. */
@@ -140,6 +142,9 @@ struct intel_mipmap_level
         * \code
         *     x = mt->level[l].slice[s].x_offset
         *     y = mt->level[l].slice[s].y_offset
+       *
+       * On some hardware generations, we program these offsets into
+       * RENDER_SURFACE_STATE.XOffset and RENDER_SURFACE_STATE.YOffset.
         */
        GLuint x_offset;
        GLuint y_offset;
@@ -168,12 +173,16 @@ enum intel_msaa_layout
      * accommodated by scaling up the width and the height of the surface so
      * that all the samples corresponding to a pixel are located at nearby
      * memory locations.
+    *
+    * @see PRM section "Interleaved Multisampled Surfaces"
      */
     INTEL_MSAA_LAYOUT_IMS,
  
     /**
      * Uncompressed Multisample Surface.  The surface is stored as a 2D array,
      * with array slice n containing all pixel data for sample n.
+    *
+    * @see PRM section "Uncompressed Multisampled Surfaces"
      */
     INTEL_MSAA_LAYOUT_UMS,
  
@@ -185,6 +194,8 @@ enum intel_msaa_layout
      * the common case (where all samples constituting a pixel have the same
      * color value) to be stored efficiently by just using a single array
      * slice.
+    *
+    * @see PRM section "Compressed Multisampled Surfaces"
      */
     INTEL_MSAA_LAYOUT_CMS,
  };
@@ -318,14 +329,34 @@ enum miptree_array_layout {
   */
  struct intel_miptree_aux_buffer
  {
-   /** Buffer object containing the pixel data. */
+   /**
+    * Buffer object containing the pixel data.
+    *
+    * @see RENDER_SURFACE_STATE.AuxiliarySurfaceBaseAddress
+    * @see 3DSTATE_HIER_DEPTH_BUFFER.AuxiliarySurfaceBaseAddress
+    */
     drm_intel_bo *bo;
  
-   uint32_t pitch; /**< pitch in bytes. */
+   /**
+    * Pitch in bytes.
+    *
+    * @see RENDER_SURFACE_STATE.AuxiliarySurfacePitch
+    * @see 3DSTATE_HIER_DEPTH_BUFFER.SurfacePitch
+    */
+   uint32_t pitch;
  
-   uint32_t qpitch; /**< The distance in rows between array slices. */
+   /**
+    * The distance in rows between array slices.
+    *
+    * @see RENDER_SURFACE_STATE.AuxiliarySurfaceQPitch
+    * @see 3DSTATE_HIER_DEPTH_BUFFER.SurfaceQPitch
+    */
+   uint32_t qpitch;
  
-   struct intel_mipmap_tree *mt; /**< hiz miptree used with Gen6 */
+   /**
+    * Hiz miptree. Used only by Gen6.
+    */
+   struct intel_mipmap_tree *mt;
  };
  
  /* Tile resource modes */
@@ -337,15 +368,49 @@ enum intel_miptree_tr_mode {
  
  struct intel_mipmap_tree
  {
-   /** Buffer object containing the pixel data. */
+   /**
+    * Buffer object containing the surface.
+    *
+    * @see intel_mipmap_tree::offset
+    * @see RENDER_SURFACE_STATE.SurfaceBaseAddress
+    * @see RENDER_SURFACE_STATE.AuxiliarySurfaceBaseAddress
+    * @see 3DSTATE_DEPTH_BUFFER.SurfaceBaseAddress
+    * @see 3DSTATE_HIER_DEPTH_BUFFER.SurfaceBaseAddress
+    * @see 3DSTATE_STENCIL_BUFFER.SurfaceBaseAddress
+    */
     drm_intel_bo *bo;
  
-   uint32_t pitch; /**< pitch in bytes. */
+   /**
+    * Pitch in bytes.
+    *
+    * @see RENDER_SURFACE_STATE.SurfacePitch
+    * @see RENDER_SURFACE_STATE.AuxiliarySurfacePitch
+    * @see 3DSTATE_DEPTH_BUFFER.SurfacePitch
+    * @see 3DSTATE_HIER_DEPTH_BUFFER.SurfacePitch
+    * @see 3DSTATE_STENCIL_BUFFER.SurfacePitch
+    */
+   uint32_t pitch;
+
+   /**
+    * One of the I915_TILING_* flags.
+    *
+    * @see RENDER_SURFACE_STATE.TileMode
+    * @see 3DSTATE_DEPTH_BUFFER.TileMode
+    */
+   uint32_t tiling;
  
-   uint32_t tiling; /**< One of the I915_TILING_* flags */
+   /**
+    * @see RENDER_SURFACE_STATE.TiledResourceMode
+    * @see 3DSTATE_DEPTH_BUFFER.TiledResourceMode
+    */
     enum intel_miptree_tr_mode tr_mode;
  
-   /* Effectively the key:
+   /**
+    * @brief One of GL_TEXTURE_2D, GL_TEXTURE_2D_ARRAY, etc.
+    *
+    * @see RENDER_SURFACE_STATE.SurfaceType
+    * @see RENDER_SURFACE_STATE.SurfaceArray
+    * @see 3DSTATE_DEPTH_BUFFER.SurfaceType
      */
     GLenum target;
  
@@ -362,18 +427,43 @@ struct intel_mipmap_tree
      *
      * For ETC1/ETC2 textures, this is one of the uncompressed mesa texture
      * formats if the hardware lacks support for ETC1/ETC2. See @ref etc_format.
+    *
+    * @see RENDER_SURFACE_STATE.SurfaceFormat
+    * @see 3DSTATE_DEPTH_BUFFER.SurfaceFormat
      */
     mesa_format format;
  
-   /** This variable stores the value of ETC compressed texture format */
+   /**
+    * This variable stores the value of ETC compressed texture format
+    *
+    * @see RENDER_SURFACE_STATE.SurfaceFormat
+    */
     mesa_format etc_format;
  
     /**
-    * The X offset of each image in the miptree must be aligned to this.
-    * See the comments in brw_tex_layout.c.
+    * @name Surface Alignment
+    * @{
+    *
+    * This defines the alignment of the upperleft pixel of each "slice" in the
+    * surface. The alignment is in pixel coordinates relative to the surface's
+    * most upperleft pixel, which is the pixel at (x=0, y=0, layer=0,
+    * level=0).
+    *
+    * The hardware docs do not use the term "slice".  We use "slice" to mean
+    * the pixels at a given miplevel and layer. For 2D surfaces, the layer is
+    * the array slice; for 3D surfaces, the layer is the z offset.
+    *
+    * In the surface layout equations found in the hardware docs, the
+    * horizontal and vertical surface alignments often appear as variables 'i'
+    * and 'j'.
      */
-   unsigned int align_w;
-   unsigned int align_h; /**< \see align_w */
+
+   /** @see RENDER_SURFACE_STATE.SurfaceHorizontalAlignment */
+   uint32_t halign;
+
+   /** @see RENDER_SURFACE_STATE.SurfaceVerticalAlignment */
+   uint32_t valign;
+   /** @} */
  
     GLuint first_level;
     GLuint last_level;
@@ -388,19 +478,47 @@ struct intel_mipmap_tree
      */
     GLuint physical_width0, physical_height0, physical_depth0;
  
-   GLuint cpp; /**< bytes per pixel (or bytes per block if compressed) */
+   /** Bytes per pixel (or bytes per block if compressed) */
+   GLuint cpp;
+
+   /**
+    * @see RENDER_SURFACE_STATE.NumberOfMultisamples
+    * @see 3DSTATE_MULTISAMPLE.NumberOfMultisamples
+    */
     GLuint num_samples;
+
     bool compressed;
  
     /**
-    * Level zero image dimensions.  These dimensions correspond to the
+    * @name Level zero image dimensions
+    * @{
+    *
+    * These dimensions correspond to the
      * logical width, height, and depth of the texture as seen by client code.
      * Accordingly, they do not account for the extra width, height, and/or
      * depth that must be allocated in order to accommodate multisample
      * formats, nor do they account for the extra factor of 6 in depth that
      * must be allocated in order to accommodate cubemap textures.
      */
-   uint32_t logical_width0, logical_height0, logical_depth0;
+
+   /**
+    * @see RENDER_SURFACE_STATE.Width
+    * @see 3DSTATE_DEPTH_BUFFER.Width
+    */
+   uint32_t logical_width0;
+
+   /**
+    * @see RENDER_SURFACE_STATE.Height
+    * @see 3DSTATE_DEPTH_BUFFER.Height
+    */
+   uint32_t logical_height0;
+
+   /**
+    * @see RENDER_SURFACE_STATE.Depth
+    * @see 3DSTATE_DEPTH_BUFFER.Depth
+    */
+   uint32_t logical_depth0;
+   /** @} */
  
     /**
      * Indicates if we use the standard miptree layout (ALL_LOD_IN_EACH_SLICE),
@@ -417,11 +535,18 @@ struct intel_mipmap_tree
      * surfaces it is the number of blocks. For 1D array surfaces that have the
      * mipmap tree stored horizontally it is the number of pixels between each
      * slice.
+    *
+    * @see RENDER_SURFACE_STATE.SurfaceQPitch
+    * @see 3DSTATE_DEPTH_BUFFER.SurfaceQPitch
+    * @see 3DSTATE_HIER_DEPTH_BUFFER.SurfaceQPitch
+    * @see 3DSTATE_STENCIL_BUFFER.SurfaceQPitch
      */
     uint32_t qpitch;
  
     /**
      * MSAA layout used by this buffer.
+    *
+    * @see RENDER_SURFACE_STATE.MultisampledSurfaceStorageFormat
      */
     enum intel_msaa_layout msaa_layout;
  
@@ -430,24 +555,34 @@ struct intel_mipmap_tree
     GLuint total_width;
     GLuint total_height;
  
-   /* The 3DSTATE_CLEAR_PARAMS value associated with the last depth clear to
-    * this depth mipmap tree, if any.
+   /**
+    * The depth value used during the most recent fast depth clear performed
+    * on the surface. This field is invalid only if surface has never
+    * underwent a fast depth clear.
+    *
+    * @see 3DSTATE_CLEAR_PARAMS.DepthClearValue
      */
     uint32_t depth_clear_value;
  
-   /* Includes image offset tables:
-    */
+   /* Includes image offset tables: */
     struct intel_mipmap_level level[MAX_TEXTURE_LEVELS];
  
-   /* Offset into bo where miptree starts:
+   /**
+    * Offset into bo where the surface starts.
+    *
+    * @see intel_mipmap_tree::bo
+    *
+    * @see RENDER_SURFACE_STATE.AuxiliarySurfaceBaseAddress
+    * @see 3DSTATE_DEPTH_BUFFER.SurfaceBaseAddress
+    * @see 3DSTATE_HIER_DEPTH_BUFFER.SurfaceBaseAddress
+    * @see 3DSTATE_STENCIL_BUFFER.SurfaceBaseAddress
      */
     uint32_t offset;
  
     /**
      * \brief HiZ aux buffer
      *
-    * The hiz miptree contains the miptree's hiz buffer. To allocate the hiz
-    * buffer, use intel_miptree_alloc_hiz().
+    * To allocate the hiz buffer, use intel_miptree_alloc_hiz().
      *
      * To determine if hiz is enabled, do not check this pointer. Instead, use
      * intel_miptree_slice_has_hiz().
@@ -472,6 +607,7 @@ struct intel_mipmap_tree
      * require separate stencil.  It always has the true copy of the stencil
      * bits, regardless of mt->format.
      *
+    * \see 3DSTATE_STENCIL_BUFFER
      * \see intel_miptree_map_depthstencil()
      * \see intel_miptree_unmap_depthstencil()
      */
@@ -499,6 +635,11 @@ struct intel_mipmap_tree
      *
      * This value will only ever contain ones in bits 28-31, so it is safe to
      * OR into dword 7 of SURFACE_STATE.
+    *
+    * @see RENDER_SURFACE_STATE.RedClearColor
+    * @see RENDER_SURFACE_STATE.GreenClearColor
+    * @see RENDER_SURFACE_STATE.BlueClearColor
+    * @see RENDER_SURFACE_STATE.AlphaClearColor
      */
     uint32_t fast_clear_color_value;
  
diff --git a/src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp b/src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp

index ba67bc59e19206533142253a97c788ac39b576f2..8adb626d420ce7f29094d1f8338211512bd2ed23 100644 (file)
--- a/src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp
@@ -46,10 +46,10 @@ class cmod_propagation_fs_visitor : public fs_visitor
  public:
     cmod_propagation_fs_visitor(struct brw_compiler *compiler,
                                 struct brw_wm_prog_data *prog_data,
-                               struct gl_shader_program *shader_prog)
-      : fs_visitor(compiler, NULL, NULL, MESA_SHADER_FRAGMENT, NULL,
-                   &prog_data->base, shader_prog,
-                   (struct gl_program *) NULL, 8, -1) {}
+                               nir_shader *shader)
+      : fs_visitor(compiler, NULL, NULL, NULL,
+                   &prog_data->base, (struct gl_program *) NULL,
+                   shader, 8, -1) {}
  };
  
  
@@ -62,9 +62,9 @@ void cmod_propagation_test::SetUp()
  
     fp = ralloc(NULL, struct brw_fragment_program);
     prog_data = ralloc(NULL, struct brw_wm_prog_data);
-   shader_prog = ralloc(NULL, struct gl_shader_program);
+   nir_shader *shader = nir_shader_create(NULL, MESA_SHADER_FRAGMENT, NULL);
  
-   v = new cmod_propagation_fs_visitor(compiler, prog_data, shader_prog);
+   v = new cmod_propagation_fs_visitor(compiler, prog_data, shader);
  
     _mesa_init_fragment_program(ctx, &fp->program, GL_FRAGMENT_SHADER, 0);
  
diff --git a/src/mesa/drivers/dri/i965/test_fs_saturate_propagation.cpp b/src/mesa/drivers/dri/i965/test_fs_saturate_propagation.cpp

index 1caa0b50ec60052cb4cd9b95e76cb707a82f2ee0..f77b18e7db8711ffc01977665da79fd034ab04c8 100644 (file)
--- a/src/mesa/drivers/dri/i965/test_fs_saturate_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/test_fs_saturate_propagation.cpp
@@ -46,10 +46,10 @@ class saturate_propagation_fs_visitor : public fs_visitor
  public:
     saturate_propagation_fs_visitor(struct brw_compiler *compiler,
                                     struct brw_wm_prog_data *prog_data,
-                                   struct gl_shader_program *shader_prog)
-      : fs_visitor(compiler, NULL, NULL, MESA_SHADER_FRAGMENT, NULL,
-                   &prog_data->base, shader_prog,
-                   (struct gl_program *) NULL, 8, -1) {}
+                                   nir_shader *shader)
+      : fs_visitor(compiler, NULL, NULL, NULL,
+                   &prog_data->base, (struct gl_program *) NULL,
+                   shader, 8, -1) {}
  };
  
  
@@ -62,9 +62,9 @@ void saturate_propagation_test::SetUp()
  
     fp = ralloc(NULL, struct brw_fragment_program);
     prog_data = ralloc(NULL, struct brw_wm_prog_data);
-   shader_prog = ralloc(NULL, struct gl_shader_program);
+   nir_shader *shader = nir_shader_create(NULL, MESA_SHADER_FRAGMENT, NULL);
  
-   v = new saturate_propagation_fs_visitor(compiler, prog_data, shader_prog);
+   v = new saturate_propagation_fs_visitor(compiler, prog_data, shader);
  
     _mesa_init_fragment_program(ctx, &fp->program, GL_FRAGMENT_SHADER, 0);
  
diff --git a/src/mesa/drivers/dri/i965/test_vec4_copy_propagation.cpp b/src/mesa/drivers/dri/i965/test_vec4_copy_propagation.cpp

index fbd9fa8f19b6c9619aa8abe5eec1e44b23497428..40253961a6553463bfaead5b2c6f690d903f2517 100644 (file)
--- a/src/mesa/drivers/dri/i965/test_vec4_copy_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/test_vec4_copy_propagation.cpp
@@ -45,9 +45,8 @@ class copy_propagation_vec4_visitor : public vec4_visitor
  {
  public:
     copy_propagation_vec4_visitor(struct brw_compiler *compiler,
-                                  struct gl_shader_program *shader_prog)
-      : vec4_visitor(compiler, NULL, NULL, NULL, NULL, shader_prog,
-                     MESA_SHADER_VERTEX, NULL,
+                                 nir_shader *shader)
+      : vec4_visitor(compiler, NULL, NULL, NULL, shader, NULL,
                       false /* no_spills */, -1)
     {
     }
@@ -69,11 +68,6 @@ protected:
        unreachable("Not reached");
     }
  
-   virtual void emit_program_code()
-   {
-      unreachable("Not reached");
-   }
-
     virtual void emit_thread_end()
     {
        unreachable("Not reached");
@@ -100,9 +94,9 @@ void copy_propagation_test::SetUp()
  
     vp = ralloc(NULL, struct brw_vertex_program);
  
-   shader_prog = ralloc(NULL, struct gl_shader_program);
+   nir_shader *shader = nir_shader_create(NULL, MESA_SHADER_VERTEX, NULL);
  
-   v = new copy_propagation_vec4_visitor(compiler, shader_prog);
+   v = new copy_propagation_vec4_visitor(compiler, shader);
  
     _mesa_init_vertex_program(ctx, &vp->program, GL_VERTEX_SHADER, 0);
  
diff --git a/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp b/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp

index a3055fcc8516421d992403db05eb3507d11a3bd9..76028d3631105f51664c8221189746ce779d03aa 100644 (file)
--- a/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp
+++ b/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp
@@ -48,9 +48,8 @@ class register_coalesce_vec4_visitor : public vec4_visitor
  {
  public:
     register_coalesce_vec4_visitor(struct brw_compiler *compiler,
-                                  struct gl_shader_program *shader_prog)
-      : vec4_visitor(compiler, NULL, NULL, NULL, NULL, shader_prog,
-                     MESA_SHADER_VERTEX, NULL,
+                                  nir_shader *shader)
+      : vec4_visitor(compiler, NULL, NULL, NULL, shader, NULL,
                       false /* no_spills */, -1)
     {
     }
@@ -72,11 +71,6 @@ protected:
        unreachable("Not reached");
     }
  
-   virtual void emit_program_code()
-   {
-      unreachable("Not reached");
-   }
-
     virtual void emit_thread_end()
     {
        unreachable("Not reached");
@@ -103,9 +97,9 @@ void register_coalesce_test::SetUp()
  
     vp = ralloc(NULL, struct brw_vertex_program);
  
-   shader_prog = ralloc(NULL, struct gl_shader_program);
+   nir_shader *shader = nir_shader_create(NULL, MESA_SHADER_VERTEX, NULL);
  
-   v = new register_coalesce_vec4_visitor(compiler, shader_prog);
+   v = new register_coalesce_vec4_visitor(compiler, shader);
  
     _mesa_init_vertex_program(ctx, &vp->program, GL_VERTEX_SHADER, 0);
  
diff --git a/src/mesa/main/compute.c b/src/mesa/main/compute.c

index 8bc3bcd25a0d1a7d9cd82f669e3fdc52324489c1..53e7a500f6136c51b74dbfd517111be8b134ef9c 100644 (file)
--- a/src/mesa/main/compute.c
+++ b/src/mesa/main/compute.c
@@ -50,7 +50,7 @@ _mesa_DispatchComputeIndirect(GLintptr indirect)
     GET_CURRENT_CONTEXT(ctx);
  
     if (MESA_VERBOSE & VERBOSE_API)
-      _mesa_debug(ctx, "glDispatchComputeIndirect(%d)\n", indirect);
+      _mesa_debug(ctx, "glDispatchComputeIndirect(%ld)\n", (long) indirect);
  
     if (!_mesa_validate_DispatchComputeIndirect(ctx, indirect))
        return;
diff --git a/src/mesa/main/ff_fragment_shader.cpp b/src/mesa/main/ff_fragment_shader.cpp

index c6828925f5e7a9d78a8da98188fe44af095974fe..c0030bc56878a5eb4326bf87786795b27fd9d752 100644 (file)
--- a/src/mesa/main/ff_fragment_shader.cpp
+++ b/src/mesa/main/ff_fragment_shader.cpp
@@ -27,29 +27,27 @@
   * 
   **************************************************************************/
  
-#include "glheader.h"
-#include "imports.h"
-#include "mtypes.h"
+#include "main/glheader.h"
  #include "main/context.h"
+#include "main/imports.h"
  #include "main/macros.h"
  #include "main/samplerobj.h"
  #include "main/texenvprogram.h"
  #include "main/texobj.h"
  #include "main/uniforms.h"
+#include "glsl/ir_builder.h"
+#include "glsl/ir_optimization.h"
+#include "glsl/glsl_parser_extras.h"
+#include "glsl/glsl_symbol_table.h"
+#include "glsl/glsl_types.h"
+#include "program/ir_to_mesa.h"
  #include "program/program.h"
-#include "program/prog_parameter.h"
+#include "program/programopt.h"
  #include "program/prog_cache.h"
  #include "program/prog_instruction.h"
+#include "program/prog_parameter.h"
  #include "program/prog_print.h"
  #include "program/prog_statevars.h"
-#include "program/programopt.h"
-#include "../glsl/glsl_types.h"
-#include "../glsl/ir.h"
-#include "../glsl/ir_builder.h"
-#include "../glsl/glsl_symbol_table.h"
-#include "../glsl/glsl_parser_extras.h"
-#include "../glsl/ir_optimization.h"
-#include "../program/ir_to_mesa.h"
  
  using namespace ir_builder;
  
diff --git a/src/mesa/main/ffvertex_prog.c b/src/mesa/main/ffvertex_prog.c

index 95b428dca3eb87ce0c8e452ef77d4bf3ac3464f9..a6183b47e2e4a86dd13f166bdd5a6b20370d9f72 100644 (file)
--- a/src/mesa/main/ffvertex_prog.c
+++ b/src/mesa/main/ffvertex_prog.c
@@ -293,9 +293,10 @@ struct ureg {
     GLuint file:4;
     GLint idx:9;      /* relative addressing may be negative */
                       /* sizeof(idx) should == sizeof(prog_src_reg::Index) */
+   GLuint abs:1;
     GLuint negate:1;
     GLuint swz:12;
-   GLuint pad:6;
+   GLuint pad:5;
  };
  
  
@@ -324,6 +325,7 @@ static const struct ureg undef = {
     0,
     0,
     0,
+   0,
     0
  };
  
@@ -342,6 +344,7 @@ static struct ureg make_ureg(GLuint file, GLint idx)
     struct ureg reg;
     reg.file = file;
     reg.idx = idx;
+   reg.abs = 0;
     reg.negate = 0;
     reg.swz = SWIZZLE_NOOP;
     reg.pad = 0;
@@ -350,6 +353,14 @@ static struct ureg make_ureg(GLuint file, GLint idx)
  
  
  
+static struct ureg absolute( struct ureg reg )
+{
+   reg.abs = 1;
+   reg.negate = 0;
+   return reg;
+}
+
+
  static struct ureg negate( struct ureg reg )
  {
     reg.negate ^= 1;
@@ -526,8 +537,8 @@ static void emit_arg( struct prog_src_register *src,
     src->File = reg.file;
     src->Index = reg.idx;
     src->Swizzle = reg.swz;
+   src->Abs = reg.abs;
     src->Negate = reg.negate ? NEGATE_XYZW : NEGATE_NONE;
-   src->Abs = 0;
     src->RelAddr = 0;
     /* Check that bitfield sizes aren't exceeded */
     assert(src->Index == reg.idx);
@@ -953,7 +964,7 @@ static struct ureg calculate_light_attenuation( struct tnl_program *p,
  
        emit_op2(p, OPCODE_DP3, spot, 0, negate(VPpli), spot_dir_norm);
        emit_op2(p, OPCODE_SLT, slt, 0, swizzle1(spot_dir_norm,W), spot);
-      emit_op2(p, OPCODE_POW, spot, 0, spot, swizzle1(attenuation, W));
+      emit_op2(p, OPCODE_POW, spot, 0, absolute(spot), swizzle1(attenuation, W));
        emit_op2(p, OPCODE_MUL, att, 0, slt, spot);
  
        release_temp(p, spot);
diff --git a/src/mesa/main/imports.h b/src/mesa/main/imports.h

index d61279ac4e5a8f5a2e99e5e13274b9d4cf30c42a..90247587be3c372c42ace681da2927d5904f7ff0 100644 (file)
--- a/src/mesa/main/imports.h
+++ b/src/mesa/main/imports.h
@@ -423,6 +423,9 @@ _mesa_vsnprintf(char *str, size_t size, const char *fmt, va_list arg);
  #define snprintf _snprintf
  #endif
  
+#if defined(_WIN32) && !defined(strtok_r)
+#define strtok_r strtok_s
+#endif
  
  #ifdef __cplusplus
  }
diff --git a/src/mesa/main/pipelineobj.c b/src/mesa/main/pipelineobj.c

index c2e1d29ad80b52ee71ca8c82bcd770cb68fcb512..51ee10ff8585383fee3ec753ddcc218818b4d3a9 100644 (file)
--- a/src/mesa/main/pipelineobj.c
+++ b/src/mesa/main/pipelineobj.c
@@ -31,6 +31,7 @@
   * GL_ARB_separate_shader_objects extension.
   */
  
+#include <stdbool.h>
  #include "main/glheader.h"
  #include "main/context.h"
  #include "main/dispatch.h"
@@ -42,12 +43,11 @@
  #include "main/shaderobj.h"
  #include "main/transformfeedback.h"
  #include "main/uniforms.h"
+#include "glsl/glsl_parser_extras.h"
+#include "glsl/ir_uniform.h"
  #include "program/program.h"
  #include "program/prog_parameter.h"
  #include "util/ralloc.h"
-#include <stdbool.h>
-#include "../glsl/glsl_parser_extras.h"
-#include "../glsl/ir_uniform.h"
  
  /**
   * Delete a pipeline object.
diff --git a/src/mesa/main/shader_query.cpp b/src/mesa/main/shader_query.cpp

index 73dee85cc4dfb2d3475ff0fd3d44573fc0af8892..718967605b586ed5b0d6f5b56cfb60d4bc0228b8 100644 (file)
--- a/src/mesa/main/shader_query.cpp
+++ b/src/mesa/main/shader_query.cpp
@@ -611,11 +611,10 @@ _mesa_program_resource_index(struct gl_shader_program *shProg,
        return GL_INVALID_INDEX;
  
     switch (res->Type) {
-   case GL_UNIFORM_BLOCK:
-   case GL_SHADER_STORAGE_BLOCK:
-      return RESOURCE_UBO(res)- shProg->UniformBlocks;
     case GL_ATOMIC_COUNTER_BUFFER:
        return RESOURCE_ATC(res) - shProg->AtomicBuffers;
+   case GL_UNIFORM_BLOCK:
+   case GL_SHADER_STORAGE_BLOCK:
     case GL_TRANSFORM_FEEDBACK_VARYING:
     default:
        return calc_resource_index(shProg, res);
diff --git a/src/mesa/main/texobj.c b/src/mesa/main/texobj.c

index a1be1e3304233dee63a083f522fa95be7e075979..173e43c817c2d0f558c714a752a88420579324e8 100644 (file)
--- a/src/mesa/main/texobj.c
+++ b/src/mesa/main/texobj.c
@@ -1192,18 +1192,18 @@ invalidate_tex_image_error_check(struct gl_context *ctx, GLuint texture,
  struct gl_texture_object *
  _mesa_create_nameless_texture(struct gl_context *ctx, GLenum target)
  {
-      struct gl_texture_object *texObj = NULL;
-      GLint targetIndex;
+   struct gl_texture_object *texObj = NULL;
+   GLint targetIndex;
  
-      if (target == 0)
-         return texObj;
+   if (target == 0)
+      return texObj;
  
-      texObj = ctx->Driver.NewTextureObject(ctx, 0, target);
-      targetIndex = _mesa_tex_target_to_index(ctx, texObj->Target);
-      assert(targetIndex < NUM_TEXTURE_TARGETS);
-      texObj->TargetIndex = targetIndex;
+   texObj = ctx->Driver.NewTextureObject(ctx, 0, target);
+   targetIndex = _mesa_tex_target_to_index(ctx, texObj->Target);
+   assert(targetIndex < NUM_TEXTURE_TARGETS);
+   texObj->TargetIndex = targetIndex;
  
-      return texObj;
+   return texObj;
  }
  
  /**
@@ -1622,25 +1622,79 @@ _mesa_tex_target_to_index(const struct gl_context *ctx, GLenum target)
  
  
  /**
- * Bind a named texture to a texturing target.
+ * Do actual texture binding.  All error checking should have been done prior
+ * to calling this function.  Note that the texture target (1D, 2D, etc) is
+ * always specified by the texObj->TargetIndex.
+ *
+ * \param unit  index of texture unit to update
+ * \param texObj  the new texture object (cannot be NULL)
+ */
+static void
+bind_texture(struct gl_context *ctx,
+             unsigned unit,
+             struct gl_texture_object *texObj)
+{
+   struct gl_texture_unit *texUnit;
+   int targetIndex;
+
+   assert(unit < ARRAY_SIZE(ctx->Texture.Unit));
+   texUnit = &ctx->Texture.Unit[unit];
+
+   assert(texObj);
+   assert(valid_texture_object(texObj));
+
+   targetIndex = texObj->TargetIndex;
+   assert(targetIndex >= 0);
+   assert(targetIndex < NUM_TEXTURE_TARGETS);
+
+   /* Check if this texture is only used by this context and is already bound.
+    * If so, just return.
+    */
+   {
+      bool early_out;
+      mtx_lock(&ctx->Shared->Mutex);
+      early_out = ((ctx->Shared->RefCount == 1)
+                   && (texObj == texUnit->CurrentTex[targetIndex]));
+      mtx_unlock(&ctx->Shared->Mutex);
+      if (early_out) {
+         return;
+      }
+   }
+
+   /* flush before changing binding */
+   FLUSH_VERTICES(ctx, _NEW_TEXTURE);
+
+   /* If the refcount on the previously bound texture is decremented to
+    * zero, it'll be deleted here.
+    */
+   _mesa_reference_texobj(&texUnit->CurrentTex[targetIndex], texObj);
+
+   ctx->Texture.NumCurrentTexUsed = MAX2(ctx->Texture.NumCurrentTexUsed,
+                                         unit + 1);
+
+   if (texObj->Name != 0)
+      texUnit->_BoundTextures |= (1 << targetIndex);
+   else
+      texUnit->_BoundTextures &= ~(1 << targetIndex);
+
+   /* Pass BindTexture call to device driver */
+   if (ctx->Driver.BindTexture) {
+      ctx->Driver.BindTexture(ctx, unit, texObj->Target, texObj);
+   }
+}
+
+
+/**
+ * Implement glBindTexture().  Do error checking, look-up or create a new
+ * texture object, then bind it in the current texture unit.
   *
   * \param target texture target.
   * \param texName texture name.
- *
- * \sa glBindTexture().
- *
- * Determines the old texture object bound and returns immediately if rebinding
- * the same texture.  Get the current texture which is either a default texture
- * if name is null, a named texture from the hash, or a new texture if the
- * given texture name is new. Increments its reference count, binds it, and
- * calls dd_function_table::BindTexture. Decrements the old texture reference
- * count and deletes it if it reaches zero.
   */
  void GLAPIENTRY
  _mesa_BindTexture( GLenum target, GLuint texName )
  {
     GET_CURRENT_CONTEXT(ctx);
-   struct gl_texture_unit *texUnit = _mesa_get_current_tex_unit(ctx);
     struct gl_texture_object *newTexObj = NULL;
     GLint targetIndex;
  
@@ -1702,95 +1756,12 @@ _mesa_BindTexture( GLenum target, GLuint texName )
        newTexObj->TargetIndex = targetIndex;
     }
  
-   assert(valid_texture_object(newTexObj));
-
-   /* Check if this texture is only used by this context and is already bound.
-    * If so, just return.
-    */
-   {
-      GLboolean early_out;
-      mtx_lock(&ctx->Shared->Mutex);
-      early_out = ((ctx->Shared->RefCount == 1)
-                   && (newTexObj == texUnit->CurrentTex[targetIndex]));
-      mtx_unlock(&ctx->Shared->Mutex);
-      if (early_out) {
-         return;
-      }
-   }
-
-   /* flush before changing binding */
-   FLUSH_VERTICES(ctx, _NEW_TEXTURE);
-
-   /* Do the actual binding.  The refcount on the previously bound
-    * texture object will be decremented.  It'll be deleted if the
-    * count hits zero.
-    */
-   _mesa_reference_texobj(&texUnit->CurrentTex[targetIndex], newTexObj);
-   ctx->Texture.NumCurrentTexUsed = MAX2(ctx->Texture.NumCurrentTexUsed,
-                                         ctx->Texture.CurrentUnit + 1);
-   assert(texUnit->CurrentTex[targetIndex]);
-
-   if (texName != 0)
-      texUnit->_BoundTextures |= (1 << targetIndex);
-   else
-      texUnit->_BoundTextures &= ~(1 << targetIndex);
-
-   /* Pass BindTexture call to device driver */
-   if (ctx->Driver.BindTexture)
-      ctx->Driver.BindTexture(ctx, ctx->Texture.CurrentUnit, target, newTexObj);
+   bind_texture(ctx, ctx->Texture.CurrentUnit, newTexObj);
  }
  
-/**
- * Do the actual binding to a numbered texture unit.
- * The refcount on the previously bound
- * texture object will be decremented.  It'll be deleted if the
- * count hits zero.
- */
-static void
-bind_texture_unit(struct gl_context *ctx,
-                  GLuint unit,
-                  struct gl_texture_object *texObj)
-{
-   struct gl_texture_unit *texUnit;
-
-   /* Get the texture unit (this is an array look-up) */
-   texUnit = _mesa_get_tex_unit_err(ctx, unit, "glBindTextureUnit");
-   if (!texUnit)
-      return;
-
-   /* Check if this texture is only used by this context and is already bound.
-    * If so, just return.
-    */
-   {
-      bool early_out;
-      mtx_lock(&ctx->Shared->Mutex);
-      early_out = ((ctx->Shared->RefCount == 1)
-                   && (texObj == texUnit->CurrentTex[texObj->TargetIndex]));
-      mtx_unlock(&ctx->Shared->Mutex);
-      if (early_out) {
-         return;
-      }
-   }
-
-   /* flush before changing binding */
-   FLUSH_VERTICES(ctx, _NEW_TEXTURE);
-
-   _mesa_reference_texobj(&texUnit->CurrentTex[texObj->TargetIndex],
-                          texObj);
-   assert(texUnit->CurrentTex[texObj->TargetIndex]);
-   ctx->Texture.NumCurrentTexUsed = MAX2(ctx->Texture.NumCurrentTexUsed,
-                                         unit + 1);
-   texUnit->_BoundTextures |= (1 << texObj->TargetIndex);
-
-
-   /* Pass BindTexture call to device driver */
-   if (ctx->Driver.BindTexture) {
-      ctx->Driver.BindTexture(ctx, unit, texObj->Target, texObj);
-   }
-}
  
  /**
- * Bind a named texture to the specified texture unit.
+ * OpenGL 4.5 / GL_ARB_direct_state_access glBindTextureUnit().
   *
   * \param unit texture unit.
   * \param texture texture name.
@@ -1807,6 +1778,18 @@ _mesa_BindTextureUnit(GLuint unit, GLuint texture)
  {
     GET_CURRENT_CONTEXT(ctx);
     struct gl_texture_object *texObj;
+   struct gl_texture_unit *texUnit;
+
+   if (unit >= _mesa_max_tex_unit(ctx)) {
+      _mesa_error(ctx, GL_INVALID_VALUE, "glBindTextureUnit(unit=%u)", unit);
+      return;
+   }
+
+   texUnit = _mesa_get_tex_unit(ctx, unit);
+   assert(texUnit);
+   if (!texUnit) {
+      return;
+   }
  
     if (MESA_VERBOSE & (VERBOSE_API|VERBOSE_TEXTURE))
        _mesa_debug(ctx, "glBindTextureUnit %s %d\n",
@@ -1833,15 +1816,19 @@ _mesa_BindTextureUnit(GLuint unit, GLuint texture)
        return;
     }
     if (texObj->Target == 0) {
-      _mesa_error(ctx, GL_INVALID_ENUM, "glBindTextureUnit(target)");
+      /* Texture object was gen'd but never bound so the target is not set */
+      _mesa_error(ctx, GL_INVALID_OPERATION, "glBindTextureUnit(target)");
        return;
     }
     assert(valid_texture_object(texObj));
  
-   bind_texture_unit(ctx, unit, texObj);
+   bind_texture(ctx, unit, texObj);
  }
  
  
+/**
+ * OpenGL 4.4 / GL_ARB_multi_bind glBindTextures().
+ */
  void GLAPIENTRY
  _mesa_BindTextures(GLuint first, GLsizei count, const GLuint *textures)
  {
@@ -1862,12 +1849,6 @@ _mesa_BindTextures(GLuint first, GLsizei count, const GLuint *textures)
        return;
     }
  
-   /* Flush before changing bindings */
-   FLUSH_VERTICES(ctx, 0);
-
-   ctx->Texture.NumCurrentTexUsed = MAX2(ctx->Texture.NumCurrentTexUsed,
-                                         first + count);
-
     if (textures) {
        /* Note that the error semantics for multi-bind commands differ from
         * those of other GL commands.
@@ -1902,24 +1883,7 @@ _mesa_BindTextures(GLuint first, GLsizei count, const GLuint *textures)
                 texObj = _mesa_lookup_texture_locked(ctx, textures[i]);
  
              if (texObj && texObj->Target != 0) {
-               const gl_texture_index targetIndex = texObj->TargetIndex;
-
-               if (texUnit->CurrentTex[targetIndex] != texObj) {
-                  /* Do the actual binding.  The refcount on the previously
-                   * bound texture object will be decremented.  It will be
-                   * deleted if the count hits zero.
-                   */
-                  _mesa_reference_texobj(&texUnit->CurrentTex[targetIndex],
-                                         texObj);
-
-                  texUnit->_BoundTextures |= (1 << targetIndex);
-                  ctx->NewState |= _NEW_TEXTURE;
-
-                  /* Pass the BindTexture call to the device driver */
-                  if (ctx->Driver.BindTexture)
-                     ctx->Driver.BindTexture(ctx, first + i,
-                                             texObj->Target, texObj);
-               }
+               bind_texture(ctx, first + i, texObj);
              } else {
                 /* The ARB_multi_bind spec says:
                  *
diff --git a/src/mesa/main/texstate.h b/src/mesa/main/texstate.h

index bee8c9c3316998581238d9a08aee39eb1e6246ee..52fe60275c23095d273c9aeb2d9a4659062fe1d9 100644 (file)
--- a/src/mesa/main/texstate.h
+++ b/src/mesa/main/texstate.h
@@ -63,24 +63,6 @@ _mesa_max_tex_unit(struct gl_context *ctx)
                 ctx->Const.MaxTextureCoordUnits);
  }
  
-static inline struct gl_texture_unit *
-_mesa_get_tex_unit_err(struct gl_context *ctx, GLuint unit, const char *func)
-{
-   if (unit < _mesa_max_tex_unit(ctx))
-      return _mesa_get_tex_unit(ctx, unit);
-
-   /* Note: This error is a precedent set by glBindTextures. From the GL 4.5
-    * specification (30.10.2014) Section 8.1 ("Texture Objects"):
-    *
-    *    "An INVALID_OPERATION error is generated if first + count is greater
-    *     than the number of texture image units supported by the
-    *     implementation."
-    */
-   _mesa_error(ctx, GL_INVALID_OPERATION, "%s(unit=%s)", func,
-               _mesa_enum_to_string(GL_TEXTURE0+unit));
-   return NULL;
-}
-
  
  extern void
  _mesa_copy_texture_state( const struct gl_context *src, struct gl_context *dst );
diff --git a/src/mesa/main/uniform_query.cpp b/src/mesa/main/uniform_query.cpp

index 0bee59455a3697bc8dad29fa1d29c8e387201256..33c959dc1a5583f3b2faf6804cf129286a5b115a 100644 (file)
--- a/src/mesa/main/uniform_query.cpp
+++ b/src/mesa/main/uniform_query.cpp
@@ -28,15 +28,14 @@
  
  #include "main/core.h"
  #include "main/context.h"
-#include "ir.h"
-#include "ir_uniform.h"
-#include "program/hash_table.h"
-#include "../glsl/program.h"
-#include "../glsl/ir_uniform.h"
-#include "../glsl/glsl_parser_extras.h"
  #include "main/shaderapi.h"
  #include "main/shaderobj.h"
-#include "uniforms.h"
+#include "main/uniforms.h"
+#include "glsl/ir.h"
+#include "glsl/ir_uniform.h"
+#include "glsl/glsl_parser_extras.h"
+#include "glsl/program.h"
+#include "program/hash_table.h"
  
  
  extern "C" void GLAPIENTRY
diff --git a/src/mesa/main/uniforms.h b/src/mesa/main/uniforms.h

index 96172b72aa13c3440a83c6a4143622108869d585..bec035cdc97cf20e367660b4603f232c63204eff 100644 (file)
--- a/src/mesa/main/uniforms.h
+++ b/src/mesa/main/uniforms.h
@@ -26,10 +26,10 @@
  #ifndef UNIFORMS_H
  #define UNIFORMS_H
  
-#include "glheader.h"
+#include "main/glheader.h"
+#include "glsl/glsl_types.h"
+#include "glsl/ir_uniform.h"
  #include "program/prog_parameter.h"
-#include "../glsl/glsl_types.h"
-#include "../glsl/ir_uniform.h"
  
  #ifdef __cplusplus
  extern "C" {
diff --git a/src/mesa/program/hash_table.h b/src/mesa/program/hash_table.h

index e85a836a00bd4ea16d6ddcae0a9a0287e95d7cd7..d0a2abffa343c8b229acefcc8d08aba2efb9f19a 100644 (file)
--- a/src/mesa/program/hash_table.h
+++ b/src/mesa/program/hash_table.h
@@ -249,6 +249,7 @@ public:
        wrapper->closure = closure;
  
        hash_table_call_foreach(this->ht, subtract_one_wrapper, wrapper);
+      free(wrapper);
     }
  
     /**
diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp

index 1cfcf9119f97c2508219a0fc7508b826a05d1b92..98032456662667d6ddcd716853107709e2428a23 100644 (file)
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -31,22 +31,20 @@
  
  #include <stdio.h>
  #include "main/compiler.h"
-#include "ir.h"
-#include "ir_visitor.h"
-#include "ir_expression_flattening.h"
-#include "ir_uniform.h"
-#include "glsl_types.h"
-#include "glsl_parser_extras.h"
-#include "../glsl/program.h"
-#include "ir_optimization.h"
-#include "ast.h"
-#include "linker.h"
-
  #include "main/mtypes.h"
  #include "main/shaderapi.h"
  #include "main/shaderobj.h"
  #include "main/uniforms.h"
-
+#include "glsl/ast.h"
+#include "glsl/ir.h"
+#include "glsl/ir_expression_flattening.h"
+#include "glsl/ir_visitor.h"
+#include "glsl/ir_optimization.h"
+#include "glsl/ir_uniform.h"
+#include "glsl/glsl_parser_extras.h"
+#include "glsl/glsl_types.h"
+#include "glsl/linker.h"
+#include "glsl/program.h"
  #include "program/hash_table.h"
  #include "program/prog_instruction.h"
  #include "program/prog_optimize.h"
diff --git a/src/mesa/program/prog_to_nir.c b/src/mesa/program/prog_to_nir.c

index 1bd735a47bb3fcf848e8c4ebc5b3e43ddea9c10b..fc00534028fa87958e6092005c71bcd15a7e3c06 100644 (file)
--- a/src/mesa/program/prog_to_nir.c
+++ b/src/mesa/program/prog_to_nir.c
@@ -1122,6 +1122,19 @@ prog_to_nir(const struct gl_program *prog,
  
     ptn_add_output_stores(c);
  
+   s->info.name = ralloc_asprintf(s, "ARB%d", prog->Id);
+   s->info.num_textures = _mesa_fls(prog->SamplersUsed);
+   s->info.num_ubos = 0;
+   s->info.num_abos = 0;
+   s->info.num_ssbos = 0;
+   s->info.num_images = 0;
+   s->info.inputs_read = prog->InputsRead;
+   s->info.outputs_written = prog->OutputsWritten;
+   s->info.system_values_read = prog->SystemValuesRead;
+   s->info.uses_texture_gather = false;
+   s->info.uses_clip_distance_out = false;
+   s->info.separate_shader = false;
+
  fail:
     if (c->error) {
        ralloc_free(s);
diff --git a/src/mesa/program/sampler.cpp b/src/mesa/program/sampler.cpp

index ea3024d512ae6d0a979950e4a6b93b7fa5ee1c6f..b1168fdade8d0d996ee7588066e93c45a2494a9d 100644 (file)
--- a/src/mesa/program/sampler.cpp
+++ b/src/mesa/program/sampler.cpp
@@ -23,13 +23,12 @@
   * DEALINGS IN THE SOFTWARE.
   */
  
-#include "ir.h"
-#include "glsl_types.h"
-#include "ir_visitor.h"
-#include "../glsl/program.h"
-#include "ir_uniform.h"
-
  #include "main/mtypes.h"
+#include "glsl/glsl_types.h"
+#include "glsl/ir.h"
+#include "glsl/ir_uniform.h"
+#include "glsl/ir_visitor.h"
+#include "glsl/program.h"
  #include "program/hash_table.h"
  #include "program/prog_parameter.h"
  #include "program/program.h"
diff --git a/src/mesa/state_tracker/st_draw.c b/src/mesa/state_tracker/st_draw.c

index 2ad679b1cb8e5dff3f5f357f650f75ce2a268ca1..f4b273bf93fb65222285c8f7d5e4d976b201f5be 100644 (file)
--- a/src/mesa/state_tracker/st_draw.c
+++ b/src/mesa/state_tracker/st_draw.c
@@ -42,6 +42,8 @@
  #include "main/macros.h"
  #include "main/varray.h"
  
+#include "glsl/ir_uniform.h"
+
  #include "vbo/vbo.h"
  
  #include "st_context.h"
@@ -62,8 +64,6 @@
  #include "draw/draw_context.h"
  #include "cso_cache/cso_context.h"
  
-#include "../glsl/ir_uniform.h"
-
  
  /**
   * This is very similar to vbo_all_varyings_in_vbos() but we are
diff --git a/src/util/ralloc.c b/src/util/ralloc.c

index 01719c888b1023b958a58957ec2dcdd3d96ca798..e07fce74f23d7b95c4f59961d7ddd420eec0602f 100644 (file)
--- a/src/util/ralloc.c
+++ b/src/util/ralloc.c
@@ -359,10 +359,7 @@ ralloc_strndup(const void *ctx, const char *str, size_t max)
     if (unlikely(str == NULL))
        return NULL;
  
-   n = strlen(str);
-   if (n > max)
-      n = max;
-
+   n = strnlen(str, max);
     ptr = ralloc_array(ctx, char, n + 1);
     memcpy(ptr, str, n);
     ptr[n] = '\0';
diff --git a/src/util/strndup.c b/src/util/strndup.c

index ca1c6f53b575bb5e722944c568952c08df9680cd..5ceb32fe4743d0d46f0453efb6dae69964190922 100644 (file)
--- a/src/util/strndup.c
+++ b/src/util/strndup.c
@@ -35,10 +35,7 @@ strndup(const char *str, size_t max)
     if (!str)
        return NULL;
  
-   n = strlen(str);
-   if (n > max)
-      n = max;
-
+   n = strnlen(str, max);
     ptr = (char *) calloc(n + 1, sizeof(char));
     if (!ptr)
        return NULL;
diff --git a/src/vulkan/anv_compiler.cpp b/src/vulkan/anv_compiler.cpp

index b06775d05ca0cc4623ce218b9a74438e9e4ec654..4a00863b71847adb25fee4c4ff3640cbf6e00192 100644 (file)
--- a/src/vulkan/anv_compiler.cpp
+++ b/src/vulkan/anv_compiler.cpp
@@ -883,6 +883,9 @@ setup_nir_io(struct gl_shader *mesa_shader,
        prog->OutputsWritten |= BITFIELD64_BIT(var->data.location);
     }
  
+   shader->info.inputs_read = prog->InputsRead;
+   shader->info.outputs_written = prog->OutputsWritten;
+
     mesa_shader->num_uniform_components = shader->num_uniforms;
  }
author	Jason Ekstrand <jason.ekstrand@intel.com>
	Fri, 2 Oct 2015 23:45:48 +0000 (16:45 -0700)
committer	Jason Ekstrand <jason.ekstrand@intel.com>
	Fri, 2 Oct 2015 23:52:47 +0000 (16:52 -0700)
src/glsl/ast_array_index.cpp		patch \| blob \| history
src/glsl/ast_to_hir.cpp		patch \| blob \| history
src/glsl/glsl_types.cpp		patch \| blob \| history
src/glsl/link_uniforms.cpp		patch \| blob \| history
src/glsl/linker.cpp		patch \| blob \| history
src/glsl/lower_ubo_reference.cpp		patch \| blob \| history
src/glsl/nir/glsl_to_nir.cpp		patch \| blob \| history
src/glsl/nir/glsl_to_nir.h		patch \| blob \| history
src/glsl/nir/nir.c		patch \| blob \| history
src/glsl/nir/nir.h		patch \| blob \| history
src/glsl/nir/nir_intrinsics.h		patch \| blob \| history
src/glsl/nir/nir_lower_gs_intrinsics.c		patch \| blob \| history
src/glsl/nir/nir_lower_io.c		patch \| blob \| history
src/glsl/nir/nir_lower_system_values.c		patch \| blob \| history
src/glsl/nir/nir_opt_remove_phis.c		patch \| blob \| history
src/glsl/shader_enums.h		patch \| blob \| history
src/glx/Makefile.am		patch \| blob \| history
src/glx/dri_common.c		patch \| blob \| history
src/glx/glxcmds.c		patch \| blob \| history
src/glx/glxextensions.h		patch \| blob \| history
src/mapi/mapi_glapi.c		patch \| blob \| history
src/mesa/drivers/common/meta_blit.c		patch \| blob \| history
src/mesa/drivers/dri/i915/i830_context.h		patch \| blob \| history
src/mesa/drivers/dri/i915/i915_context.h		patch \| blob \| history
src/mesa/drivers/dri/i915/i915_fragprog.c		patch \| blob \| history
src/mesa/drivers/dri/i915/intel_fbo.c		patch \| blob \| history
src/mesa/drivers/dri/i965/Makefile.sources		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_context.c		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_context.h		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_cs.c		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_cs.h		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_fs.cpp		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_fs.h		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_fs_nir.cpp		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_fs_visitor.cpp		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_gs.c		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_nir.c		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_nir.h		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp	[new file with mode: 0644]	patch \| blob
src/mesa/drivers/dri/i965/brw_program.c		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_program.h		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_shader.cpp		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_shader.h		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_state.h		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_state_cache.c		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_state_upload.c		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_tex_layout.c		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_vec4.cpp		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_vec4.h		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_vec4_nir.cpp		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_vec4_vp.cpp	[deleted file]	patch \| blob \| history
src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_vs.c		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_vs.h		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_wm.c		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_wm.h		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_wm_iz.cpp		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_wm_surface_state.c		patch \| blob \| history
src/mesa/drivers/dri/i965/gen6_blorp.cpp		patch \| blob \| history
src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp		patch \| blob \| history
src/mesa/drivers/dri/i965/gen6_gs_visitor.h		patch \| blob \| history
src/mesa/drivers/dri/i965/gen6_surface_state.c		patch \| blob \| history
src/mesa/drivers/dri/i965/gen7_blorp.cpp		patch \| blob \| history
src/mesa/drivers/dri/i965/gen7_cs_state.c		patch \| blob \| history
src/mesa/drivers/dri/i965/gen7_wm_surface_state.c		patch \| blob \| history
src/mesa/drivers/dri/i965/gen8_surface_state.c		patch \| blob \| history
src/mesa/drivers/dri/i965/intel_copy_image.c		patch \| blob \| history
src/mesa/drivers/dri/i965/intel_mipmap_tree.c		patch \| blob \| history
src/mesa/drivers/dri/i965/intel_mipmap_tree.h		patch \| blob \| history
src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp		patch \| blob \| history
src/mesa/drivers/dri/i965/test_fs_saturate_propagation.cpp		patch \| blob \| history
src/mesa/drivers/dri/i965/test_vec4_copy_propagation.cpp		patch \| blob \| history
src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp		patch \| blob \| history
src/mesa/main/compute.c		patch \| blob \| history
src/mesa/main/ff_fragment_shader.cpp		patch \| blob \| history
src/mesa/main/ffvertex_prog.c		patch \| blob \| history
src/mesa/main/imports.h		patch \| blob \| history
src/mesa/main/pipelineobj.c		patch \| blob \| history
src/mesa/main/shader_query.cpp		patch \| blob \| history
src/mesa/main/texobj.c		patch \| blob \| history
src/mesa/main/texstate.h		patch \| blob \| history
src/mesa/main/uniform_query.cpp		patch \| blob \| history
src/mesa/main/uniforms.h		patch \| blob \| history
src/mesa/program/hash_table.h		patch \| blob \| history
src/mesa/program/ir_to_mesa.cpp		patch \| blob \| history
src/mesa/program/prog_to_nir.c		patch \| blob \| history
src/mesa/program/sampler.cpp		patch \| blob \| history
src/mesa/state_tracker/st_draw.c		patch \| blob \| history
src/util/ralloc.c		patch \| blob \| history
src/util/strndup.c		patch \| blob \| history
src/vulkan/anv_compiler.cpp		patch \| blob \| history