glsl_to_tgsi: Remove st_new_shader

[mesa.git] / src / mesa / state_tracker / st_glsl_to_tgsi.cpp
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp

index 575da1eaf8f7653f9dc7ce6a61cbb89a7b1eaf64..a4e2c8da58697e6da6366c2284409d86af06e40b 100644 (file)
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -74,14 +74,6 @@ extern "C" {
                             (1 << PROGRAM_CONSTANT) |     \
                             (1 << PROGRAM_UNIFORM))
  
-/**
- * Maximum number of temporary registers.
- *
- * It is too big for stack allocated arrays -- it will cause stack overflow on
- * Windows and likely Mac OS X.
- */
-#define MAX_TEMPS         4096
-
  /**
   * Maximum number of arrays
   */
@@ -446,7 +438,6 @@ public:
                int mul_operand);
     bool try_emit_mad_for_and_not(ir_expression *ir,
                int mul_operand);
-   bool try_emit_sat(ir_expression *ir);
  
     void emit_swz(ir_expression *ir);
  
@@ -1081,8 +1072,8 @@ glsl_to_tgsi_visitor::visit(ir_variable *ir)
  
     if (ir->data.mode == ir_var_uniform && strncmp(ir->name, "gl_", 3) == 0) {
        unsigned int i;
-      const ir_state_slot *const slots = ir->state_slots;
-      assert(ir->state_slots != NULL);
+      const ir_state_slot *const slots = ir->get_state_slots();
+      assert(slots != NULL);
  
        /* Check if this statevar's setup in the STATE file exactly
         * matches how we'll want to reference it as a
@@ -1090,7 +1081,7 @@ glsl_to_tgsi_visitor::visit(ir_variable *ir)
         * temporary storage and hope that it'll get copy-propagated
         * out.
         */
-      for (i = 0; i < ir->num_state_slots; i++) {
+      for (i = 0; i < ir->get_num_state_slots(); i++) {
           if (slots[i].swizzle != SWIZZLE_XYZW) {
              break;
           }
@@ -1098,7 +1089,7 @@ glsl_to_tgsi_visitor::visit(ir_variable *ir)
  
        variable_storage *storage;
        st_dst_reg dst;
-      if (i == ir->num_state_slots) {
+      if (i == ir->get_num_state_slots()) {
           /* We'll set the index later. */
           storage = new(mem_ctx) variable_storage(ir, PROGRAM_STATE_VAR, -1);
           this->variables.push_tail(storage);
@@ -1109,7 +1100,7 @@ glsl_to_tgsi_visitor::visit(ir_variable *ir)
            * of the type.  However, this had better match the number of state
            * elements that we're going to copy into the new temporary.
            */
-         assert((int) ir->num_state_slots == type_size(ir->type));
+         assert((int) ir->get_num_state_slots() == type_size(ir->type));
  
           dst = st_dst_reg(get_temp(ir->type));
  
@@ -1119,7 +1110,7 @@ glsl_to_tgsi_visitor::visit(ir_variable *ir)
        }
  
  
-      for (unsigned int i = 0; i < ir->num_state_slots; i++) {
+      for (unsigned int i = 0; i < ir->get_num_state_slots(); i++) {
           int index = _mesa_add_state_reference(this->prog->Parameters,
                                                (gl_state_index *)slots[i].tokens);
  
@@ -1144,7 +1135,7 @@ glsl_to_tgsi_visitor::visit(ir_variable *ir)
        }
  
        if (storage->file == PROGRAM_TEMPORARY &&
-          dst.index != storage->index + (int) ir->num_state_slots) {
+          dst.index != storage->index + (int) ir->get_num_state_slots()) {
           fail_link(this->shader_program,
                    "failed to load builtin uniform `%s'  (%d/%d regs loaded)\n",
                    ir->name, dst.index - storage->index,
@@ -1270,53 +1261,6 @@ glsl_to_tgsi_visitor::try_emit_mad_for_and_not(ir_expression *ir, int try_operan
     return true;
  }
  
-bool
-glsl_to_tgsi_visitor::try_emit_sat(ir_expression *ir)
-{
-   /* Emit saturates in the vertex shader only if SM 3.0 is supported.
-    */
-   if (this->prog->Target == GL_VERTEX_PROGRAM_ARB &&
-       !st_context(this->ctx)->has_shader_model3) {
-      return false;
-   }
-
-   ir_rvalue *sat_src = ir->as_rvalue_to_saturate();
-   if (!sat_src)
-      return false;
-
-   sat_src->accept(this);
-   st_src_reg src = this->result;
-
-   /* If we generated an expression instruction into a temporary in
-    * processing the saturate's operand, apply the saturate to that
-    * instruction.  Otherwise, generate a MOV to do the saturate.
-    *
-    * Note that we have to be careful to only do this optimization if
-    * the instruction in question was what generated src->result.  For
-    * example, ir_dereference_array might generate a MUL instruction
-    * to create the reladdr, and return us a src reg using that
-    * reladdr.  That MUL result is not the value we're trying to
-    * saturate.
-    */
-   ir_expression *sat_src_expr = sat_src->as_expression();
-   if (sat_src_expr && (sat_src_expr->operation == ir_binop_mul ||
-                       sat_src_expr->operation == ir_binop_add ||
-                       sat_src_expr->operation == ir_binop_dot)) {
-      glsl_to_tgsi_instruction *new_inst;
-      new_inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail();
-      new_inst->saturate = true;
-   } else {
-      this->result = get_temp(ir->type);
-      st_dst_reg result_dst = st_dst_reg(this->result);
-      result_dst.writemask = (1 << ir->type->vector_elements) - 1;
-      glsl_to_tgsi_instruction *inst;
-      inst = emit(ir, TGSI_OPCODE_MOV, result_dst, src);
-      inst->saturate = true;
-   }
-
-   return true;
-}
-
  void
  glsl_to_tgsi_visitor::reladdr_to_temp(ir_instruction *ir,
                                     st_src_reg *reg, int *num_reladdr)
@@ -1363,9 +1307,6 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
          return;
     }
  
-   if (try_emit_sat(ir))
-      return;
-
     if (ir->operation == ir_quadop_vector)
        assert(!"ir_quadop_vector should have been lowered");
  
@@ -1460,6 +1401,12 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
     case ir_unop_cos_reduced:
        emit_scs(ir, TGSI_OPCODE_COS, result_dst, op[0]);
        break;
+   case ir_unop_saturate: {
+      glsl_to_tgsi_instruction *inst;
+      inst = emit(ir, TGSI_OPCODE_MOV, result_dst, op[0]);
+      inst->saturate = true;
+      break;
+   }
  
     case ir_unop_dFdx:
     case ir_unop_dFdx_coarse:
@@ -2670,10 +2617,7 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir)
     case GLSL_TYPE_BOOL:
        gl_type = native_integers ? GL_BOOL : GL_FLOAT;
        for (i = 0; i < ir->type->vector_elements; i++) {
-         if (native_integers)
-            values[i].u = ir->value.b[i] ? ~0 : 0;
-         else
-            values[i].f = ir->value.b[i];
+         values[i].u = ir->value.b[i] ? ctx->Const.UniformBooleanTrue : 0;
        }
        break;
     default:
@@ -3144,8 +3088,18 @@ glsl_to_tgsi_visitor::visit(ir_discard *ir)
  {
     if (ir->condition) {
        ir->condition->accept(this);
-      this->result.negate = ~this->result.negate;
-      emit(ir, TGSI_OPCODE_KILL_IF, undef_dst, this->result);
+      st_src_reg condition = this->result;
+
+      /* Convert the bool condition to a float so we can negate. */
+      if (native_integers) {
+         st_src_reg temp = get_temp(ir->condition->type);
+         emit(ir, TGSI_OPCODE_AND, st_dst_reg(temp),
+              condition, st_src_reg_for_float(1.0));
+         condition = temp;
+      }
+
+      condition.negate = ~condition.negate;
+      emit(ir, TGSI_OPCODE_KILL_IF, undef_dst, condition);
     } else {
        /* unconditional kil */
        emit(ir, TGSI_OPCODE_KILL);
@@ -3215,6 +3169,7 @@ glsl_to_tgsi_visitor::glsl_to_tgsi_visitor()
     shader_program = NULL;
     shader = NULL;
     options = NULL;
+   have_sqrt = false;
  }
  
  glsl_to_tgsi_visitor::~glsl_to_tgsi_visitor()
@@ -3301,14 +3256,10 @@ get_src_arg_mask(st_dst_reg dst, st_src_reg src)
  void
  glsl_to_tgsi_visitor::simplify_cmp(void)
  {
-   unsigned *tempWrites;
+   int tempWritesSize = 0;
+   unsigned *tempWrites = NULL;
     unsigned outputWrites[MAX_PROGRAM_OUTPUTS];
  
-   tempWrites = new unsigned[MAX_TEMPS];
-   if (!tempWrites) {
-      return;
-   }
-   memset(tempWrites, 0, sizeof(unsigned) * MAX_TEMPS);
     memset(outputWrites, 0, sizeof(outputWrites));
  
     foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
@@ -3330,7 +3281,19 @@ glsl_to_tgsi_visitor::simplify_cmp(void)
           prevWriteMask = outputWrites[inst->dst.index];
           outputWrites[inst->dst.index] |= inst->dst.writemask;
        } else if (inst->dst.file == PROGRAM_TEMPORARY) {
-         assert(inst->dst.index < MAX_TEMPS);
+         if (inst->dst.index >= tempWritesSize) {
+            const int inc = 4096;
+
+            tempWrites = (unsigned*)
+                         realloc(tempWrites,
+                                 (tempWritesSize + inc) * sizeof(unsigned));
+            if (!tempWrites)
+               return;
+
+            memset(tempWrites + tempWritesSize, 0, inc * sizeof(unsigned));
+            tempWritesSize += inc;
+         }
+
           prevWriteMask = tempWrites[inst->dst.index];
           tempWrites[inst->dst.index] |= inst->dst.writemask;
        } else
@@ -3349,7 +3312,7 @@ glsl_to_tgsi_visitor::simplify_cmp(void)
        }
     }
  
-   delete [] tempWrites;
+   free(tempWrites);
  }
  
  /* Replaces all references to a temporary register index with another index. */
@@ -4158,7 +4121,9 @@ struct label {
  struct st_translate {
     struct ureg_program *ureg;
  
-   struct ureg_dst temps[MAX_TEMPS];
+   unsigned temps_size;
+   struct ureg_dst *temps;
+
     struct ureg_dst arrays[MAX_ARRAYS];
     struct ureg_src *constants;
     struct ureg_src *immediates;
@@ -4200,6 +4165,8 @@ const unsigned _mesa_sysval_to_semantic[SYSTEM_VALUE_MAX] = {
      */
     TGSI_SEMANTIC_VERTEXID,
     TGSI_SEMANTIC_INSTANCEID,
+   0,
+   0,
  
     /* Geometry shader
      */
@@ -4299,7 +4266,19 @@ dst_register(struct st_translate *t,
        return ureg_dst_undef();
  
     case PROGRAM_TEMPORARY:
-      assert(index < Elements(t->temps));
+      /* Allocate space for temporaries on demand. */
+      if (index >= t->temps_size) {
+         const int inc = 4096;
+
+         t->temps = (struct ureg_dst*)
+                    realloc(t->temps,
+                            (t->temps_size + inc) * sizeof(struct ureg_dst));
+         if (!t->temps)
+            return ureg_dst_undef();
+
+         memset(t->temps + t->temps_size, 0, inc * sizeof(struct ureg_dst));
+         t->temps_size += inc;
+      }
  
        if (ureg_dst_is_undef(t->temps[index]))
           t->temps[index] = ureg_DECL_local_temporary(t->ureg);
@@ -4802,15 +4781,19 @@ emit_wpos(struct st_context *st,
   * saturating the value to [0,1] does the job.
   */
  static void
-emit_face_var(struct st_translate *t)
+emit_face_var(struct gl_context *ctx, struct st_translate *t)
  {
     struct ureg_program *ureg = t->ureg;
     struct ureg_dst face_temp = ureg_DECL_temporary(ureg);
     struct ureg_src face_input = t->inputs[t->inputMapping[VARYING_SLOT_FACE]];
  
-   /* MOV_SAT face_temp, input[face] */
-   face_temp = ureg_saturate(face_temp);
-   ureg_MOV(ureg, face_temp, face_input);
+   if (ctx->Const.NativeIntegers) {
+      ureg_FSGE(ureg, face_temp, face_input, ureg_imm1f(ureg, 0));
+   }
+   else {
+      /* MOV_SAT face_temp, input[face] */
+      ureg_MOV(ureg, ureg_saturate(face_temp), face_input);
+   }
  
     /* Use face_temp as face input from here on: */
     t->inputs[t->inputMapping[VARYING_SLOT_FACE]] = ureg_src(face_temp);
@@ -4930,7 +4913,7 @@ st_translate_program(
        }
  
        if (proginfo->InputsRead & VARYING_BIT_FACE)
-         emit_face_var(t);
+         emit_face_var(ctx, t);
  
        /*
         * Declare output attributes.
@@ -5158,6 +5141,7 @@ st_translate_program(
  
  out:
     if (t) {
+      free(t->temps);
        free(t->insn);
        free(t->labels);
        free(t->constants);
@@ -5230,6 +5214,7 @@ get_mesa_program(struct gl_context *ctx,
     v->have_sqrt = pscreen->get_shader_param(pscreen, ptarget,
                                              PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED);
  
+   _mesa_copy_linked_program_data(shader->Stage, shader_program, prog);
     _mesa_generate_parameters_list_for_uniforms(shader_program, shader,
                                                prog->Parameters);
  
@@ -5335,10 +5320,6 @@ get_mesa_program(struct gl_context *ctx,
     case GL_GEOMETRY_SHADER:
        stgp = (struct st_geometry_program *)prog;
        stgp->glsl_to_tgsi = v;
-      stgp->Base.InputType = shader_program->Geom.InputType;
-      stgp->Base.OutputType = shader_program->Geom.OutputType;
-      stgp->Base.VerticesOut = shader_program->Geom.VerticesOut;
-      stgp->Base.Invocations = shader_program->Geom.Invocations;
        break;
     default:
        assert(!"should not be reached");
@@ -5350,34 +5331,6 @@ get_mesa_program(struct gl_context *ctx,
  
  extern "C" {
  
-struct gl_shader *
-st_new_shader(struct gl_context *ctx, GLuint name, GLuint type)
-{
-   struct gl_shader *shader;
-   assert(type == GL_FRAGMENT_SHADER || type == GL_VERTEX_SHADER ||
-          type == GL_GEOMETRY_SHADER_ARB);
-   shader = rzalloc(NULL, struct gl_shader);
-   if (shader) {
-      shader->Type = type;
-      shader->Stage = _mesa_shader_enum_to_shader_stage(type);
-      shader->Name = name;
-      _mesa_init_shader(ctx, shader);
-   }
-   return shader;
-}
-
-struct gl_shader_program *
-st_new_shader_program(struct gl_context *ctx, GLuint name)
-{
-   struct gl_shader_program *shProg;
-   shProg = rzalloc(NULL, struct gl_shader_program);
-   if (shProg) {
-      shProg->Name = name;
-      _mesa_init_shader_program(ctx, shProg);
-   }
-   return shProg;
-}
-
  /**
   * Link a shader.
   * Called via ctx->Driver.LinkShader()