glsl_to_tgsi: Use the GLSL compiler's new remove-output-reads pass.
[mesa.git] / src / mesa / state_tracker / st_glsl_to_tgsi.cpp
index 35fd1ffc13ae20d10ed6057b6ab0dcd21443a760..1b956a77f53be6bdc05817142136f70ab0bf4bbb 100644 (file)
@@ -53,7 +53,6 @@ extern "C" {
 #include "program/prog_optimize.h"
 #include "program/prog_print.h"
 #include "program/program.h"
-#include "program/prog_uniform.h"
 #include "program/prog_parameter.h"
 #include "program/sampler.h"
 
@@ -79,6 +78,12 @@ extern "C" {
                            (1 << PROGRAM_CONSTANT) |     \
                            (1 << PROGRAM_UNIFORM))
 
+/**
+ * Maximum number of temporary registers.
+ *
+ * It is too big for stack allocated arrays -- it will cause stack overflow on
+ * Windows and likely Mac OS X.
+ */
 #define MAX_TEMPS         4096
 
 /* will be 4 for GLSL 4.00 */
@@ -299,6 +304,7 @@ public:
    int samplers_used;
    bool indirect_addr_temps;
    bool indirect_addr_consts;
+   int num_clip_distances;
    
    int glsl_version;
    bool native_integers;
@@ -1012,29 +1018,6 @@ glsl_to_tgsi_visitor::visit(ir_variable *ir)
 
       fp->OriginUpperLeft = ir->origin_upper_left;
       fp->PixelCenterInteger = ir->pixel_center_integer;
-
-   } else if (strcmp(ir->name, "gl_FragDepth") == 0) {
-      struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog;
-      switch (ir->depth_layout) {
-      case ir_depth_layout_none:
-         fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_NONE;
-         break;
-      case ir_depth_layout_any:
-         fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_ANY;
-         break;
-      case ir_depth_layout_greater:
-         fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_GREATER;
-         break;
-      case ir_depth_layout_less:
-         fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_LESS;
-         break;
-      case ir_depth_layout_unchanged:
-         fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_UNCHANGED;
-         break;
-      default:
-         assert(0);
-         break;
-      }
    }
 
    if (ir->mode == ir_var_uniform && strncmp(ir->name, "gl_", 3) == 0) {
@@ -1799,6 +1782,9 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
    case ir_unop_floor:
       emit(ir, TGSI_OPCODE_FLR, result_dst, op[0]);
       break;
+   case ir_unop_round_even:
+      emit(ir, TGSI_OPCODE_ROUND, result_dst, op[0]);
+      break;
    case ir_unop_fract:
       emit(ir, TGSI_OPCODE_FRC, result_dst, op[0]);
       break;
@@ -1825,30 +1811,30 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
       }
    case ir_binop_lshift:
       if (native_integers) {
-         emit(ir, TGSI_OPCODE_SHL, result_dst, op[0]);
+         emit(ir, TGSI_OPCODE_SHL, result_dst, op[0], op[1]);
          break;
       }
    case ir_binop_rshift:
       if (native_integers) {
-         emit(ir, TGSI_OPCODE_ISHR, result_dst, op[0]);
+         emit(ir, TGSI_OPCODE_ISHR, result_dst, op[0], op[1]);
          break;
       }
    case ir_binop_bit_and:
       if (native_integers) {
-         emit(ir, TGSI_OPCODE_AND, result_dst, op[0]);
+         emit(ir, TGSI_OPCODE_AND, result_dst, op[0], op[1]);
          break;
       }
    case ir_binop_bit_xor:
       if (native_integers) {
-         emit(ir, TGSI_OPCODE_XOR, result_dst, op[0]);
+         emit(ir, TGSI_OPCODE_XOR, result_dst, op[0], op[1]);
          break;
       }
    case ir_binop_bit_or:
       if (native_integers) {
-         emit(ir, TGSI_OPCODE_OR, result_dst, op[0]);
+         emit(ir, TGSI_OPCODE_OR, result_dst, op[0], op[1]);
          break;
       }
-   case ir_unop_round_even:
+
       assert(!"GLSL 1.30 features unsupported");
       break;
 
@@ -2725,6 +2711,9 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir)
    case GLSL_SAMPLER_DIM_BUF:
       assert(!"FINISHME: Implement ARB_texture_buffer_object");
       break;
+   case GLSL_SAMPLER_DIM_EXTERNAL:
+      inst->tex_target = TEXTURE_EXTERNAL_INDEX;
+      break;
    default:
       assert(!"Should not get here.");
    }
@@ -2865,55 +2854,6 @@ count_resources(glsl_to_tgsi_visitor *v, gl_program *prog)
    _mesa_update_shader_textures_used(prog);
 }
 
-
-/**
- * Check if the given vertex/fragment/shader program is within the
- * resource limits of the context (number of texture units, etc).
- * If any of those checks fail, record a linker error.
- *
- * XXX more checks are needed...
- */
-static void
-check_resources(const struct gl_context *ctx,
-                struct gl_shader_program *shader_program,
-                glsl_to_tgsi_visitor *prog,
-                struct gl_program *proginfo)
-{
-   switch (proginfo->Target) {
-   case GL_VERTEX_PROGRAM_ARB:
-      if (_mesa_bitcount(prog->samplers_used) >
-          ctx->Const.MaxVertexTextureImageUnits) {
-         fail_link(shader_program, "Too many vertex shader texture samplers");
-      }
-      if (proginfo->Parameters->NumParameters > MAX_UNIFORMS) {
-         fail_link(shader_program, "Too many vertex shader constants");
-      }
-      break;
-   case MESA_GEOMETRY_PROGRAM:
-      if (_mesa_bitcount(prog->samplers_used) >
-          ctx->Const.MaxGeometryTextureImageUnits) {
-         fail_link(shader_program, "Too many geometry shader texture samplers");
-      }
-      if (proginfo->Parameters->NumParameters >
-          MAX_GEOMETRY_UNIFORM_COMPONENTS / 4) {
-         fail_link(shader_program, "Too many geometry shader constants");
-      }
-      break;
-   case GL_FRAGMENT_PROGRAM_ARB:
-      if (_mesa_bitcount(prog->samplers_used) >
-          ctx->Const.MaxTextureImageUnits) {
-         fail_link(shader_program, "Too many fragment shader texture samplers");
-      }
-      if (proginfo->Parameters->NumParameters > MAX_UNIFORMS) {
-         fail_link(shader_program, "Too many fragment shader constants");
-      }
-      break;
-   default:
-      _mesa_problem(ctx, "unexpected program type in check_resources()");
-   }
-}
-
-
 static void
 set_uniform_initializer(struct gl_context *ctx, void *mem_ctx,
                        struct gl_shader_program *shader_program,
@@ -2975,12 +2915,12 @@ set_uniform_initializer(struct gl_context *ctx, void *mem_ctx,
                              element_type->matrix_columns,
                              element_type->vector_elements,
                              loc, 1, GL_FALSE, (GLfloat *)values);
-         loc += element_type->matrix_columns;
       } else {
          _mesa_uniform(ctx, shader_program, loc, element_type->matrix_columns,
                       values, element_type->gl_type);
-         loc += type_size(element_type);
       }
+
+      loc++;
    }
 }
 
@@ -3001,9 +2941,13 @@ glsl_to_tgsi_visitor::remove_output_reads(gl_register_file type)
    GLint outputMap[VERT_RESULT_MAX];
    GLint outputTypes[VERT_RESULT_MAX];
    GLuint numVaryingReads = 0;
-   GLboolean usedTemps[MAX_TEMPS];
+   GLboolean *usedTemps;
    GLuint firstTemp = 0;
 
+   usedTemps = new GLboolean[MAX_TEMPS];
+   if (!usedTemps) {
+      return;
+   }
    _mesa_find_used_registers(prog, PROGRAM_TEMPORARY,
                              usedTemps, MAX_TEMPS);
 
@@ -3036,6 +2980,8 @@ glsl_to_tgsi_visitor::remove_output_reads(gl_register_file type)
       }
    }
 
+   delete [] usedTemps;
+
    if (numVaryingReads == 0)
       return; /* nothing to be done */
 
@@ -3107,9 +3053,13 @@ get_src_arg_mask(st_dst_reg dst, st_src_reg src)
 void
 glsl_to_tgsi_visitor::simplify_cmp(void)
 {
-   unsigned tempWrites[MAX_TEMPS];
+   unsigned *tempWrites;
    unsigned outputWrites[MAX_PROGRAM_OUTPUTS];
 
+   tempWrites = new unsigned[MAX_TEMPS];
+   if (!tempWrites) {
+      return;
+   }
    memset(tempWrites, 0, sizeof(tempWrites));
    memset(outputWrites, 0, sizeof(outputWrites));
 
@@ -3125,7 +3075,7 @@ glsl_to_tgsi_visitor::simplify_cmp(void)
           inst->op == TGSI_OPCODE_END ||
           inst->op == TGSI_OPCODE_ENDSUB ||
           inst->op == TGSI_OPCODE_RET) {
-         return;
+         break;
       }
 
       if (inst->dst.file == PROGRAM_OUTPUT) {
@@ -3150,6 +3100,8 @@ glsl_to_tgsi_visitor::simplify_cmp(void)
          inst->src[0] = inst->src[1];
       }
    }
+
+   delete [] tempWrites;
 }
 
 /* Replaces all references to a temporary register index with another index. */
@@ -3557,34 +3509,37 @@ glsl_to_tgsi_visitor::eliminate_dead_code_advanced(void)
       switch (inst->op) {
       case TGSI_OPCODE_BGNLOOP:
       case TGSI_OPCODE_ENDLOOP:
+      case TGSI_OPCODE_CONT:
+      case TGSI_OPCODE_BRK:
          /* End of a basic block, clear the write array entirely.
-          * FIXME: This keeps us from killing dead code when the writes are
+          *
+          * This keeps us from killing dead code when the writes are
           * on either side of a loop, even when the register isn't touched
-          * inside the loop.
+          * inside the loop.  However, glsl_to_tgsi_visitor doesn't seem to emit
+          * dead code of this type, so it shouldn't make a difference as long as
+          * the dead code elimination pass in the GLSL compiler does its job.
           */
          memset(writes, 0, sizeof(*writes) * this->next_temp * 4);
          break;
 
       case TGSI_OPCODE_ENDIF:
-         --level;
-         break;
-
       case TGSI_OPCODE_ELSE:
-         /* Clear all channels written inside the preceding if block from the
-          * write array, but leave those that were not touched.
-          *
-          * FIXME: This destroys opportunities to remove dead code inside of
-          * IF blocks that are followed by an ELSE block.
+         /* Promote the recorded level of all channels written inside the
+          * preceding if or else block to the level above the if/else block.
           */
          for (int r = 0; r < this->next_temp; r++) {
             for (int c = 0; c < 4; c++) {
                if (!writes[4 * r + c])
                         continue;
 
-               if (write_level[4 * r + c] >= level)
-                        writes[4 * r + c] = NULL;
+               if (write_level[4 * r + c] == level)
+                        write_level[4 * r + c] = level-1;
             }
          }
+
+         if(inst->op == TGSI_OPCODE_ENDIF)
+            --level;
+         
          break;
 
       case TGSI_OPCODE_IF:
@@ -3762,6 +3717,7 @@ get_pixel_transfer_visitor(struct st_fragment_program *fp,
    /* Copy attributes of the glsl_to_tgsi_visitor in the original shader. */
    v->ctx = original->ctx;
    v->prog = prog;
+   v->shader_program = NULL;
    v->glsl_version = original->glsl_version;
    v->native_integers = original->native_integers;
    v->options = original->options;
@@ -3783,7 +3739,7 @@ get_pixel_transfer_visitor(struct st_fragment_program *fp,
    inst->sampler = 0;
    inst->tex_target = TEXTURE_2D_INDEX;
 
-   prog->InputsRead |= (1 << FRAG_ATTRIB_TEX0);
+   prog->InputsRead |= FRAG_BIT_TEX0;
    prog->SamplersUsed |= (1 << 0); /* mark sampler 0 as used */
    v->samplers_used |= (1 << 0);
 
@@ -3840,6 +3796,7 @@ get_pixel_transfer_visitor(struct st_fragment_program *fp,
     * new visitor. */
    foreach_iter(exec_list_iterator, iter, original->instructions) {
       glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
+      glsl_to_tgsi_instruction *newinst;
       st_src_reg src_regs[3];
 
       if (inst->dst.file == PROGRAM_OUTPUT)
@@ -3854,10 +3811,11 @@ get_pixel_transfer_visitor(struct st_fragment_program *fp,
             src_regs[i].index = src0.index;
          }
          else if (src_regs[i].file == PROGRAM_INPUT)
-            prog->InputsRead |= (1 << src_regs[i].index);
+            prog->InputsRead |= BITFIELD64_BIT(src_regs[i].index);
       }
 
-      v->emit(NULL, inst->op, inst->dst, src_regs[0], src_regs[1], src_regs[2]);
+      newinst = v->emit(NULL, inst->op, inst->dst, src_regs[0], src_regs[1], src_regs[2]);
+      newinst->tex_target = inst->tex_target;
    }
 
    /* Make modifications to fragment program info. */
@@ -3889,6 +3847,7 @@ get_bitmap_visitor(struct st_fragment_program *fp,
    /* Copy attributes of the glsl_to_tgsi_visitor in the original shader. */
    v->ctx = original->ctx;
    v->prog = prog;
+   v->shader_program = NULL;
    v->glsl_version = original->glsl_version;
    v->native_integers = original->native_integers;
    v->options = original->options;
@@ -3907,7 +3866,7 @@ get_bitmap_visitor(struct st_fragment_program *fp,
    inst->sampler = samplerIndex;
    inst->tex_target = TEXTURE_2D_INDEX;
 
-   prog->InputsRead |= (1 << FRAG_ATTRIB_TEX0);
+   prog->InputsRead |= FRAG_BIT_TEX0;
    prog->SamplersUsed |= (1 << samplerIndex); /* mark sampler as used */
    v->samplers_used |= (1 << samplerIndex);
 
@@ -3921,6 +3880,7 @@ get_bitmap_visitor(struct st_fragment_program *fp,
     * new visitor. */
    foreach_iter(exec_list_iterator, iter, original->instructions) {
       glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
+      glsl_to_tgsi_instruction *newinst;
       st_src_reg src_regs[3];
 
       if (inst->dst.file == PROGRAM_OUTPUT)
@@ -3929,10 +3889,11 @@ get_bitmap_visitor(struct st_fragment_program *fp,
       for (int i=0; i<3; i++) {
          src_regs[i] = inst->src[i];
          if (src_regs[i].file == PROGRAM_INPUT)
-            prog->InputsRead |= (1 << src_regs[i].index);
+            prog->InputsRead |= BITFIELD64_BIT(src_regs[i].index);
       }
 
-      v->emit(NULL, inst->op, inst->dst, src_regs[0], src_regs[1], src_regs[2]);
+      newinst = v->emit(NULL, inst->op, inst->dst, src_regs[0], src_regs[1], src_regs[2]);
+      newinst->tex_target = inst->tex_target;
    }
 
    /* Make modifications to fragment program info. */
@@ -3995,6 +3956,7 @@ struct st_translate {
 /** Map Mesa's SYSTEM_VALUE_x to TGSI_SEMANTIC_x */
 static unsigned mesa_sysval_to_semantic[SYSTEM_VALUE_MAX] = {
    TGSI_SEMANTIC_FACE,
+   TGSI_SEMANTIC_VERTEXID,
    TGSI_SEMANTIC_INSTANCEID
 };
 
@@ -4577,14 +4539,19 @@ st_translate_program(
    const ubyte outputSemanticIndex[],
    boolean passthrough_edgeflags)
 {
-   struct st_translate translate, *t;
+   struct st_translate *t;
    unsigned i;
    enum pipe_error ret = PIPE_OK;
 
    assert(numInputs <= Elements(t->inputs));
    assert(numOutputs <= Elements(t->outputs));
 
-   t = &translate;
+   t = CALLOC_STRUCT(st_translate);
+   if (!t) {
+      ret = PIPE_ERROR_OUT_OF_MEMORY;
+      goto out;
+   }
+
    memset(t, 0, sizeof *t);
 
    t->procType = procType;
@@ -4594,6 +4561,15 @@ st_translate_program(
    t->pointSizeOutIndex = -1;
    t->prevInstWrotePointSize = GL_FALSE;
 
+   if (program->shader_program) {
+      for (i = 0; i < program->shader_program->NumUserUniformStorage; i++) {
+         struct gl_uniform_storage *const storage =
+               &program->shader_program->UniformStorage[i];
+
+         _mesa_uniform_detach_all_driver_storage(storage);
+      }
+   }
+
    /*
     * Declare input attributes.
     */
@@ -4639,7 +4615,8 @@ st_translate_program(
             break;
          default:
             assert(!"fragment shader outputs must be POSITION/STENCIL/COLOR");
-            return PIPE_ERROR_BAD_INPUT;
+            ret = PIPE_ERROR_BAD_INPUT;
+            goto out;
          }
       }
    }
@@ -4665,9 +4642,17 @@ st_translate_program(
       }
 
       for (i = 0; i < numOutputs; i++) {
-         t->outputs[i] = ureg_DECL_output(ureg,
-                                          outputSemanticName[i],
-                                          outputSemanticIndex[i]);
+         if (outputSemanticName[i] == TGSI_SEMANTIC_CLIPDIST) {
+            int mask = ((1 << (program->num_clip_distances - 4*outputSemanticIndex[i])) - 1) & TGSI_WRITEMASK_XYZW;
+            t->outputs[i] = ureg_DECL_output_masked(ureg,
+                                                    outputSemanticName[i],
+                                                    outputSemanticIndex[i],
+                                                    mask);
+         } else {
+            t->outputs[i] = ureg_DECL_output(ureg,
+                                             outputSemanticName[i],
+                                             outputSemanticIndex[i]);
+         }
          if ((outputSemanticName[i] == TGSI_SEMANTIC_PSIZE) && proginfo->Id) {
             /* Writing to the point size result register requires special
              * handling to implement clamping.
@@ -4819,14 +4804,32 @@ st_translate_program(
                        t->insn[t->labels[i].branch_target]);
    }
 
+   if (program->shader_program) {
+      /* This has to be done last.  Any operation the can cause
+       * prog->ParameterValues to get reallocated (e.g., anything that adds a
+       * program constant) has to happen before creating this linkage.
+       */
+      for (unsigned i = 0; i < MESA_SHADER_TYPES; i++) {
+         if (program->shader_program->_LinkedShaders[i] == NULL)
+            continue;
+
+         _mesa_associate_uniform_storage(ctx, program->shader_program,
+               program->shader_program->_LinkedShaders[i]->Program->Parameters);
+      }
+   }
+
 out:
-   FREE(t->insn);
-   FREE(t->labels);
-   FREE(t->constants);
-   FREE(t->immediates);
+   if (t) {
+      FREE(t->insn);
+      FREE(t->labels);
+      FREE(t->constants);
+      FREE(t->immediates);
+
+      if (t->error) {
+         debug_printf("%s: translate error flag set\n", __FUNCTION__);
+      }
 
-   if (t->error) {
-      debug_printf("%s: translate error flag set\n", __FUNCTION__);
+      FREE(t);
    }
 
    return ret;
@@ -4840,10 +4843,13 @@ out:
 static struct gl_program *
 get_mesa_program(struct gl_context *ctx,
                  struct gl_shader_program *shader_program,
-                struct gl_shader *shader)
+                 struct gl_shader *shader,
+                 int num_clip_distances)
 {
    glsl_to_tgsi_visitor* v = new glsl_to_tgsi_visitor();
    struct gl_program *prog;
+   struct pipe_screen * screen = st_context(ctx)->pipe->screen;
+   unsigned pipe_shader_type;
    GLenum target;
    const char *target_string;
    bool progress;
@@ -4854,14 +4860,17 @@ get_mesa_program(struct gl_context *ctx,
    case GL_VERTEX_SHADER:
       target = GL_VERTEX_PROGRAM_ARB;
       target_string = "vertex";
+      pipe_shader_type = PIPE_SHADER_VERTEX;
       break;
    case GL_FRAGMENT_SHADER:
       target = GL_FRAGMENT_PROGRAM_ARB;
       target_string = "fragment";
+      pipe_shader_type = PIPE_SHADER_FRAGMENT;
       break;
    case GL_GEOMETRY_SHADER:
       target = GL_GEOMETRY_PROGRAM_NV;
       target_string = "geometry";
+      pipe_shader_type = PIPE_SHADER_GEOMETRY;
       break;
    default:
       assert(!"should not be reached");
@@ -4880,10 +4889,18 @@ get_mesa_program(struct gl_context *ctx,
    v->options = options;
    v->glsl_version = ctx->Const.GLSLVersion;
    v->native_integers = ctx->Const.NativeIntegers;
+   v->num_clip_distances = num_clip_distances;
 
    _mesa_generate_parameters_list_for_uniforms(shader_program, shader,
                                               prog->Parameters);
 
+   if (!screen->get_shader_param(screen, pipe_shader_type,
+                                 PIPE_SHADER_CAP_OUTPUT_READ)) {
+      /* Remove reads to output registers, and to varyings in vertex shaders. */
+      lower_output_reads(shader->ir);
+   }
+
+
    /* Emit intermediate IR for main(). */
    visit_exec_list(shader->ir, v);
 
@@ -4930,11 +4947,6 @@ get_mesa_program(struct gl_context *ctx,
    }
 #endif
 
-   /* Remove reads to output registers, and to varyings in vertex shaders. */
-   v->remove_output_reads(PROGRAM_OUTPUT);
-   if (target == GL_VERTEX_PROGRAM_ARB)
-      v->remove_output_reads(PROGRAM_VARYING);
-   
    /* Perform optimizations on the instructions in the glsl_to_tgsi_visitor. */
    v->simplify_cmp();
    v->copy_propagate();
@@ -4961,18 +4973,26 @@ get_mesa_program(struct gl_context *ctx,
       _mesa_print_ir(shader->ir, NULL);
       printf("\n");
       printf("\n");
+      fflush(stdout);
    }
 
    prog->Instructions = NULL;
    prog->NumInstructions = 0;
 
-   do_set_program_inouts(shader->ir, prog);
+   do_set_program_inouts(shader->ir, prog, shader->Type == GL_FRAGMENT_SHADER);
    count_resources(v, prog);
 
-   check_resources(ctx, shader_program, v, prog);
-
    _mesa_reference_program(ctx, &shader->Program, prog);
    
+   /* This has to be done last.  Any operation the can cause
+    * prog->ParameterValues to get reallocated (e.g., anything that adds a
+    * program constant) has to happen before creating this linkage.
+    */
+   _mesa_associate_uniform_storage(ctx, shader_program, prog->Parameters);
+   if (!shader_program->LinkStatus) {
+      return NULL;
+   }
+
    struct st_vertex_program *stvp;
    struct st_fragment_program *stfp;
    struct st_geometry_program *stgp;
@@ -4998,6 +5018,25 @@ get_mesa_program(struct gl_context *ctx,
    return prog;
 }
 
+/**
+ * Searches through the IR for a declaration of gl_ClipDistance and returns the
+ * declared size of the gl_ClipDistance array.  Returns 0 if gl_ClipDistance is
+ * not declared in the IR.
+ */
+int get_clip_distance_size(exec_list *ir)
+{
+   foreach_iter (exec_list_iterator, iter, *ir) {
+      ir_instruction *inst = (ir_instruction *)iter.get();
+      ir_variable *var = inst->as_variable();
+      if (var == NULL) continue;
+      if (!strcmp(var->name, "gl_ClipDistance")) {
+         return var->type->length;
+      }
+   }
+   
+   return 0;
+}
+
 extern "C" {
 
 struct gl_shader *
@@ -5036,6 +5075,7 @@ st_new_shader_program(struct gl_context *ctx, GLuint name)
 GLboolean
 st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
 {
+   int num_clip_distances[MESA_SHADER_TYPES];
    assert(prog->LinkStatus);
 
    for (unsigned i = 0; i < MESA_SHADER_TYPES; i++) {
@@ -5047,6 +5087,11 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
       const struct gl_shader_compiler_options *options =
             &ctx->ShaderCompilerOptions[_mesa_shader_type_to_index(prog->_LinkedShaders[i]->Type)];
 
+      /* We have to determine the length of the gl_ClipDistance array before
+       * the array is lowered to two vec4s by lower_clip_distance().
+       */
+      num_clip_distances[i] = get_clip_distance_size(ir);
+
       do {
          progress = false;
 
@@ -5058,9 +5103,12 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
 
          progress = do_lower_jumps(ir, true, true, options->EmitNoMainReturn, options->EmitNoCont, options->EmitNoLoops) || progress;
 
-         progress = do_common_optimization(ir, true, options->MaxUnrollIterations) || progress;
+         progress = do_common_optimization(ir, true, true,
+                                          options->MaxUnrollIterations)
+          || progress;
 
          progress = lower_quadop_vector(ir, false) || progress;
+         progress = lower_clip_distance(ir) || progress;
 
          if (options->MaxIfDepth == 0)
             progress = lower_discard(ir) || progress;
@@ -5095,7 +5143,8 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
       if (prog->_LinkedShaders[i] == NULL)
          continue;
 
-      linked_prog = get_mesa_program(ctx, prog, prog->_LinkedShaders[i]);
+      linked_prog = get_mesa_program(ctx, prog, prog->_LinkedShaders[i],
+                                     num_clip_distances[i]);
 
       if (linked_prog) {
         static const GLenum targets[] = {
@@ -5120,4 +5169,32 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
    return GL_TRUE;
 }
 
+void
+st_translate_stream_output_info(struct glsl_to_tgsi_visitor *glsl_to_tgsi,
+                                const GLuint outputMapping[],
+                                struct pipe_stream_output_info *so)
+{
+   static unsigned comps_to_mask[] = {
+      0,
+      TGSI_WRITEMASK_X,
+      TGSI_WRITEMASK_XY,
+      TGSI_WRITEMASK_XYZ,
+      TGSI_WRITEMASK_XYZW
+   };
+   unsigned i;
+   struct gl_transform_feedback_info *info =
+      &glsl_to_tgsi->shader_program->LinkedTransformFeedback;
+
+   for (i = 0; i < info->NumOutputs; i++) {
+      assert(info->Outputs[i].NumComponents < Elements(comps_to_mask));
+      so->output[i].register_index =
+         outputMapping[info->Outputs[i].OutputRegister];
+      so->output[i].register_mask =
+         comps_to_mask[info->Outputs[i].NumComponents]
+         << info->Outputs[i].ComponentOffset;
+      so->output[i].output_buffer = info->Outputs[i].OutputBuffer;
+   }
+   so->num_outputs = info->NumOutputs;
+}
+
 } /* extern "C" */