mesa: refactor: move glPixelStore function into new pixelstore.c file
[mesa.git] / src / mesa / state_tracker / st_program.c
index 116b59a067a1620acdd4673c119bf51b56a7f6a4..d450c306947a50da04a5a5c8e110ba87da1a491b 100644 (file)
 
 #include "main/imports.h"
 #include "main/mtypes.h"
+#include "shader/prog_print.h"
+#include "shader/programopt.h"
 
 #include "pipe/p_context.h"
 #include "pipe/p_defines.h"
-#include "pipe/tgsi/mesa/mesa_to_tgsi.h"
-#include "pipe/tgsi/exec/tgsi_core.h"
+#include "pipe/p_shader_tokens.h"
+#include "draw/draw_context.h"
+#include "tgsi/util/tgsi_dump.h"
 
 #include "st_context.h"
-#include "st_cache.h"
 #include "st_atom.h"
 #include "st_program.h"
+#include "st_mesa_to_tgsi.h"
+#include "cso_cache/cso_context.h"
 
 
 #define TGSI_DEBUG 0
 
 
+/** XXX we should use the version of this from p_util.h but including
+ * that header causes symbol collisions.
+ */
+static INLINE void *
+mem_dup(const void *src, uint size)
+{
+   void *dup = MALLOC(size);
+   if (dup)
+      memcpy(dup, src, size);
+   return dup;
+}
+
+
+
 /**
  * Translate a Mesa vertex shader into a TGSI shader.
  * \param outputMapping  to map vertex program output registers to TGSI
  * \param tokensOut  destination for TGSI tokens
  * \return  pointer to cached pipe_shader object.
  */
-const struct cso_vertex_shader *
+void
 st_translate_vertex_program(struct st_context *st,
                             struct st_vertex_program *stvp,
-                            const GLuint outputMapping[],
-                            struct tgsi_token *tokensOut,
-                            GLuint maxTokens)
+                            const GLuint outputMapping[])
 {
+   struct pipe_context *pipe = st->pipe;
+   struct tgsi_token tokens[ST_MAX_SHADER_TOKENS];
    GLuint defaultOutputMapping[VERT_RESULT_MAX];
    struct pipe_shader_state vs;
-   const struct cso_vertex_shader *cso;
    GLuint attr, i;
+   GLuint num_generic = 0;
+   GLuint num_tokens;
+
+   ubyte vs_input_semantic_name[PIPE_MAX_SHADER_INPUTS];
+   ubyte vs_input_semantic_index[PIPE_MAX_SHADER_INPUTS];
+   uint vs_num_inputs = 0;
+
+   ubyte vs_output_semantic_name[PIPE_MAX_SHADER_OUTPUTS];
+   ubyte vs_output_semantic_index[PIPE_MAX_SHADER_OUTPUTS];
+   uint vs_num_outputs = 0;
 
    memset(&vs, 0, sizeof(vs));
 
+   if (stvp->Base.IsPositionInvariant)
+      _mesa_insert_mvp_code(st->ctx, &stvp->Base);
+
    /*
     * Determine number of inputs, the mappings between VERT_ATTRIB_x
     * and TGSI generic input indexes, plus input attrib semantic info.
     */
    for (attr = 0; attr < VERT_ATTRIB_MAX; attr++) {
       if (stvp->Base.Base.InputsRead & (1 << attr)) {
-         const GLuint slot = vs.num_inputs;
+         const GLuint slot = vs_num_inputs;
 
-         vs.num_inputs++;
+         vs_num_inputs++;
 
          stvp->input_to_index[attr] = slot;
          stvp->index_to_input[slot] = attr;
 
          switch (attr) {
          case VERT_ATTRIB_POS:
-            vs.input_semantic_name[slot] = TGSI_SEMANTIC_POSITION;
-            vs.input_semantic_index[slot] = 0;
+            vs_input_semantic_name[slot] = TGSI_SEMANTIC_POSITION;
+            vs_input_semantic_index[slot] = 0;
             break;
          case VERT_ATTRIB_WEIGHT:
             /* fall-through */
          case VERT_ATTRIB_NORMAL:
             /* just label as a generic */
-            vs.input_semantic_name[slot] = TGSI_SEMANTIC_GENERIC;
-            vs.input_semantic_index[slot] = 0;
+            vs_input_semantic_name[slot] = TGSI_SEMANTIC_GENERIC;
+            vs_input_semantic_index[slot] = 0;
             break;
          case VERT_ATTRIB_COLOR0:
-            vs.input_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
-            vs.input_semantic_index[slot] = 0;
+            vs_input_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
+            vs_input_semantic_index[slot] = 0;
             break;
          case VERT_ATTRIB_COLOR1:
-            vs.input_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
-            vs.input_semantic_index[slot] = 1;
+            vs_input_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
+            vs_input_semantic_index[slot] = 1;
             break;
          case VERT_ATTRIB_FOG:
-            vs.input_semantic_name[slot] = TGSI_SEMANTIC_FOG;
-            vs.input_semantic_index[slot] = 0;
+            vs_input_semantic_name[slot] = TGSI_SEMANTIC_FOG;
+            vs_input_semantic_index[slot] = 0;
             break;
          case VERT_ATTRIB_TEX0:
          case VERT_ATTRIB_TEX1:
@@ -114,8 +144,8 @@ st_translate_vertex_program(struct st_context *st,
          case VERT_ATTRIB_TEX5:
          case VERT_ATTRIB_TEX6:
          case VERT_ATTRIB_TEX7:
-            vs.input_semantic_name[slot] = TGSI_SEMANTIC_GENERIC;
-            vs.input_semantic_index[slot] = attr - VERT_ATTRIB_TEX0;
+            vs_input_semantic_name[slot] = TGSI_SEMANTIC_GENERIC;
+            vs_input_semantic_index[slot] = num_generic++;
             break;
          case VERT_ATTRIB_GENERIC0:
          case VERT_ATTRIB_GENERIC1:
@@ -126,8 +156,8 @@ st_translate_vertex_program(struct st_context *st,
          case VERT_ATTRIB_GENERIC6:
          case VERT_ATTRIB_GENERIC7:
             assert(attr < VERT_ATTRIB_MAX);
-            vs.input_semantic_name[slot] = TGSI_SEMANTIC_GENERIC;
-            vs.input_semantic_index[slot] = attr - VERT_ATTRIB_GENERIC0;
+            vs_input_semantic_name[slot] = TGSI_SEMANTIC_GENERIC;
+            vs_input_semantic_index[slot] = num_generic++;
             break;
          default:
             assert(0);
@@ -137,10 +167,11 @@ st_translate_vertex_program(struct st_context *st,
 
    /* initialize output semantics to defaults */
    for (i = 0; i < PIPE_MAX_SHADER_OUTPUTS; i++) {
-      vs.output_semantic_name[i] = TGSI_SEMANTIC_GENERIC;
-      vs.output_semantic_index[i] = 0;
+      vs_output_semantic_name[i] = TGSI_SEMANTIC_GENERIC;
+      vs_output_semantic_index[i] = 0;
    }
 
+   num_generic = 0;
    /*
     * Determine number of outputs, the (default) output register
     * mapping and the semantic information for each output.
@@ -154,8 +185,8 @@ st_translate_vertex_program(struct st_context *st,
             assert(slot != ~0);
          }
          else {
-            slot = vs.num_outputs;
-            vs.num_outputs++;
+            slot = vs_num_outputs;
+            vs_num_outputs++;
             defaultOutputMapping[attr] = slot;
          }
 
@@ -165,32 +196,33 @@ st_translate_vertex_program(struct st_context *st,
 
          switch (attr) {
          case VERT_RESULT_HPOS:
-            vs.output_semantic_name[slot] = TGSI_SEMANTIC_POSITION;
-            vs.output_semantic_index[slot] = 0;
+            assert(slot == 0);
+            vs_output_semantic_name[slot] = TGSI_SEMANTIC_POSITION;
+            vs_output_semantic_index[slot] = 0;
             break;
          case VERT_RESULT_COL0:
-            vs.output_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
-            vs.output_semantic_index[slot] = 0;
+            vs_output_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
+            vs_output_semantic_index[slot] = 0;
             break;
          case VERT_RESULT_COL1:
-            vs.output_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
-            vs.output_semantic_index[slot] = 1;
+            vs_output_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
+            vs_output_semantic_index[slot] = 1;
             break;
          case VERT_RESULT_BFC0:
-            vs.output_semantic_name[slot] = TGSI_SEMANTIC_BCOLOR;
-            vs.output_semantic_index[slot] = 0;
+            vs_output_semantic_name[slot] = TGSI_SEMANTIC_BCOLOR;
+            vs_output_semantic_index[slot] = 0;
             break;
          case VERT_RESULT_BFC1:
-            vs.output_semantic_name[slot] = TGSI_SEMANTIC_BCOLOR;
-            vs.output_semantic_index[slot] = 1;
+            vs_output_semantic_name[slot] = TGSI_SEMANTIC_BCOLOR;
+            vs_output_semantic_index[slot] = 1;
             break;
          case VERT_RESULT_FOGC:
-            vs.output_semantic_name[slot] = TGSI_SEMANTIC_FOG;
-            vs.output_semantic_index[slot] = 0;
+            vs_output_semantic_name[slot] = TGSI_SEMANTIC_FOG;
+            vs_output_semantic_index[slot] = 0;
             break;
          case VERT_RESULT_PSIZ:
-            vs.output_semantic_name[slot] = TGSI_SEMANTIC_PSIZE;
-            vs.output_semantic_index[slot] = 0;
+            vs_output_semantic_name[slot] = TGSI_SEMANTIC_PSIZE;
+            vs_output_semantic_index[slot] = 0;
             break;
          case VERT_RESULT_EDGE:
             assert(0);
@@ -203,57 +235,75 @@ st_translate_vertex_program(struct st_context *st,
          case VERT_RESULT_TEX5:
          case VERT_RESULT_TEX6:
          case VERT_RESULT_TEX7:
-            vs.output_semantic_name[slot] = TGSI_SEMANTIC_GENERIC;
-            vs.output_semantic_index[slot] = attr - VERT_RESULT_TEX0;
+            vs_output_semantic_name[slot] = TGSI_SEMANTIC_GENERIC;
+            vs_output_semantic_index[slot] = num_generic++;
             break;
          case VERT_RESULT_VAR0:
             /* fall-through */
          default:
             assert(attr - VERT_RESULT_VAR0 < MAX_VARYING);
-            vs.output_semantic_name[slot] = TGSI_SEMANTIC_GENERIC;
-            vs.output_semantic_index[slot] = attr - VERT_RESULT_VAR0;
+            vs_output_semantic_name[slot] = TGSI_SEMANTIC_GENERIC;
+            vs_output_semantic_index[slot] = num_generic++;
          }
       }
    }
 
+   assert(vs_output_semantic_name[0] == TGSI_SEMANTIC_POSITION);
+
 
    if (outputMapping) {
-      /* find max output slot referenced to compute vs.num_outputs */
+      /* find max output slot referenced to compute vs_num_outputs */
       GLuint maxSlot = 0;
       for (attr = 0; attr < VERT_RESULT_MAX; attr++) {
          if (outputMapping[attr] != ~0 && outputMapping[attr] > maxSlot)
             maxSlot = outputMapping[attr];
       }
-      vs.num_outputs = maxSlot + 1;
+      vs_num_outputs = maxSlot + 1;
    }
    else {
       outputMapping = defaultOutputMapping;
    }
 
+   /* free old shader state, if any */
+   if (stvp->state.tokens) {
+      FREE((void *) stvp->state.tokens);
+      stvp->state.tokens = NULL;
+   }
+   if (stvp->driver_shader) {
+      cso_delete_vertex_shader(st->cso_context, stvp->driver_shader);
+      stvp->driver_shader = NULL;
+   }
+
    /* XXX: fix static allocation of tokens:
     */
-   tgsi_mesa_compile_vp_program( &stvp->Base,
-                                 /* inputs */
-                                 vs.num_inputs,
-                                 stvp->input_to_index,
-                                 vs.input_semantic_name,
-                                 vs.input_semantic_index,
-                                 /* outputs */
-                                 vs.num_outputs,
-                                 outputMapping,
-                                 vs.output_semantic_name,
-                                 vs.output_semantic_index,
-                                 /* tokenized result */
-                                 tokensOut, maxTokens);
-
-   vs.tokens = tokensOut;
-   cso = st_cached_vs_state(st, &vs);
-   stvp->vs = cso;
+   num_tokens = tgsi_translate_mesa_program( TGSI_PROCESSOR_VERTEX,
+                                &stvp->Base.Base,
+                                /* inputs */
+                                vs_num_inputs,
+                                stvp->input_to_index,
+                                vs_input_semantic_name,
+                                vs_input_semantic_index,
+                                NULL,
+                                /* outputs */
+                                vs_num_outputs,
+                                outputMapping,
+                                vs_output_semantic_name,
+                                vs_output_semantic_index,
+                                /* tokenized result */
+                                tokens, ST_MAX_SHADER_TOKENS);
+
+   vs.tokens = (struct tgsi_token *)
+      mem_dup(tokens, num_tokens * sizeof(tokens[0]));
+
+   stvp->num_inputs = vs_num_inputs;
+   stvp->state = vs; /* struct copy */
+   stvp->driver_shader = pipe->create_vs_state(pipe, &vs);
+
+   if (0)
+      _mesa_print_program(&stvp->Base.Base);
 
    if (TGSI_DEBUG)
-      tgsi_dump( tokensOut, 0 );
-
-   return cso;
+      tgsi_dump( vs.tokens, 0 );
 }
 
 
@@ -265,60 +315,71 @@ st_translate_vertex_program(struct st_context *st,
  * \param tokensOut  destination for TGSI tokens
  * \return  pointer to cached pipe_shader object.
  */
-const struct cso_fragment_shader *
+void
 st_translate_fragment_program(struct st_context *st,
                               struct st_fragment_program *stfp,
-                              const GLuint inputMapping[],
-                              struct tgsi_token *tokensOut,
-                              GLuint maxTokens)
+                              const GLuint inputMapping[])
 {
+   struct pipe_context *pipe = st->pipe;
+   struct tgsi_token tokens[ST_MAX_SHADER_TOKENS];
    GLuint outputMapping[FRAG_RESULT_MAX];
    GLuint defaultInputMapping[FRAG_ATTRIB_MAX];
    struct pipe_shader_state fs;
-   const struct cso_fragment_shader *cso;
    GLuint interpMode[16];  /* XXX size? */
    GLuint attr;
-   GLbitfield inputsRead = stfp->Base.Base.InputsRead;
+   const GLbitfield inputsRead = stfp->Base.Base.InputsRead;
+   GLuint vslot = 0;
+   GLuint num_generic = 0;
+   GLuint num_tokens;
 
-   /* Check if all fragment programs need the fragment position (in order
-    * to do perspective-corrected interpolation).
-    */
-   /* XXX temporary! */
-   if (st->pipe->get_param(st->pipe, PIPE_PARAM_FS_NEEDS_POS))
-      inputsRead |= FRAG_BIT_WPOS;
+   ubyte fs_input_semantic_name[PIPE_MAX_SHADER_INPUTS];
+   ubyte fs_input_semantic_index[PIPE_MAX_SHADER_INPUTS];
+   uint fs_num_inputs = 0;
+
+   ubyte fs_output_semantic_name[PIPE_MAX_SHADER_OUTPUTS];
+   ubyte fs_output_semantic_index[PIPE_MAX_SHADER_OUTPUTS];
+   uint fs_num_outputs = 0;
 
    memset(&fs, 0, sizeof(fs));
 
+   /* which vertex output goes to the first fragment input: */
+   if (inputsRead & FRAG_BIT_WPOS)
+      vslot = 0;
+   else
+      vslot = 1;
+
    /*
     * Convert Mesa program inputs to TGSI input register semantics.
     */
    for (attr = 0; attr < FRAG_ATTRIB_MAX; attr++) {
       if (inputsRead & (1 << attr)) {
-         const GLuint slot = fs.num_inputs;
-
-         fs.num_inputs++;
+         const GLuint slot = fs_num_inputs;
 
          defaultInputMapping[attr] = slot;
 
+         stfp->input_map[slot] = vslot++;
+
+         fs_num_inputs++;
+
          switch (attr) {
          case FRAG_ATTRIB_WPOS:
-            fs.input_semantic_name[slot] = TGSI_SEMANTIC_POSITION;
-            fs.input_semantic_index[slot] = 0;
-            interpMode[slot] = TGSI_INTERPOLATE_CONSTANT;
+            fs_input_semantic_name[slot] = TGSI_SEMANTIC_POSITION;
+            fs_input_semantic_index[slot] = 0;
+            interpMode[slot] = TGSI_INTERPOLATE_LINEAR;
             break;
          case FRAG_ATTRIB_COL0:
-            fs.input_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
-            fs.input_semantic_index[slot] = 0;
+            fs_input_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
+            fs_input_semantic_index[slot] = 0;
             interpMode[slot] = TGSI_INTERPOLATE_LINEAR;
             break;
          case FRAG_ATTRIB_COL1:
-            fs.input_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
-            fs.input_semantic_index[slot] = 1;
+            fs_input_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
+            fs_input_semantic_index[slot] = 1;
             interpMode[slot] = TGSI_INTERPOLATE_LINEAR;
             break;
          case FRAG_ATTRIB_FOGC:
-            fs.input_semantic_name[slot] = TGSI_SEMANTIC_FOG;
-            fs.input_semantic_index[slot] = 0;
+            fs_input_semantic_name[slot] = TGSI_SEMANTIC_FOG;
+            fs_input_semantic_index[slot] = 0;
             interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE;
             break;
          case FRAG_ATTRIB_TEX0:
@@ -329,38 +390,55 @@ st_translate_fragment_program(struct st_context *st,
          case FRAG_ATTRIB_TEX5:
          case FRAG_ATTRIB_TEX6:
          case FRAG_ATTRIB_TEX7:
-            fs.input_semantic_name[slot] = TGSI_SEMANTIC_GENERIC;
-            fs.input_semantic_index[slot] = attr - FRAG_ATTRIB_TEX0;
+            fs_input_semantic_name[slot] = TGSI_SEMANTIC_GENERIC;
+            fs_input_semantic_index[slot] = num_generic++;
             interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE;
             break;
          case FRAG_ATTRIB_VAR0:
             /* fall-through */
          default:
-            fs.input_semantic_name[slot] = TGSI_SEMANTIC_GENERIC;
-            fs.input_semantic_index[slot] = attr - FRAG_ATTRIB_VAR0;
+            fs_input_semantic_name[slot] = TGSI_SEMANTIC_GENERIC;
+            fs_input_semantic_index[slot] = num_generic++;
             interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE;
          }
       }
    }
 
    /*
-    * Semantics for outputs
+    * Semantics and mapping for outputs
     */
-   for (attr = 0; attr < FRAG_RESULT_MAX; attr++) {
-      if (stfp->Base.Base.OutputsWritten & (1 << attr)) {
-         switch (attr) {
-         case FRAG_RESULT_DEPR:
-            fs.output_semantic_name[fs.num_outputs] = TGSI_SEMANTIC_POSITION;
-            outputMapping[attr] = fs.num_outputs;
-            break;
-         case FRAG_RESULT_COLR:
-            fs.output_semantic_name[fs.num_outputs] = TGSI_SEMANTIC_COLOR;
-            outputMapping[attr] = fs.num_outputs;
-            break;
-         default:
-            assert(0);
+   {
+      uint numColors = 0;
+      GLbitfield outputsWritten = stfp->Base.Base.OutputsWritten;
+
+      /* if z is written, emit that first */
+      if (outputsWritten & (1 << FRAG_RESULT_DEPR)) {
+         fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_POSITION;
+         fs_output_semantic_index[fs_num_outputs] = 0;
+         outputMapping[FRAG_RESULT_DEPR] = fs_num_outputs;
+         fs_num_outputs++;
+         outputsWritten &= ~(1 << FRAG_RESULT_DEPR);
+      }
+
+      /* handle remaning outputs (color) */
+      for (attr = 0; attr < FRAG_RESULT_MAX; attr++) {
+         if (outputsWritten & (1 << attr)) {
+            switch (attr) {
+            case FRAG_RESULT_DEPR:
+               /* handled above */
+               assert(0);
+               break;
+            case FRAG_RESULT_COLR:
+               fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_COLOR;
+               fs_output_semantic_index[fs_num_outputs] = numColors;
+               outputMapping[attr] = fs_num_outputs;
+               numColors++;
+               break;
+            default:
+               assert(0);
+            }
+            fs_num_outputs++;
          }
-         fs.num_outputs++;
       }
    }
 
@@ -369,35 +447,32 @@ st_translate_fragment_program(struct st_context *st,
 
    /* XXX: fix static allocation of tokens:
     */
-   tgsi_mesa_compile_fp_program( &stfp->Base,
-                                 /* inputs */
-                                 fs.num_inputs,
-                                 inputMapping,
-                                 fs.input_semantic_name,
-                                 fs.input_semantic_index,
-                                 interpMode,
-                                 /* outputs */
-                                 outputMapping,
-                                 /* tokenized result */
-                                 tokensOut, maxTokens);
-
-
-   fs.tokens = tokensOut;
-
-   cso = st_cached_fs_state(st, &fs);
-   stfp->fs = cso;
+   num_tokens = tgsi_translate_mesa_program( TGSI_PROCESSOR_FRAGMENT,
+                                &stfp->Base.Base,
+                                /* inputs */
+                                fs_num_inputs,
+                                inputMapping,
+                                fs_input_semantic_name,
+                                fs_input_semantic_index,
+                                interpMode,
+                                /* outputs */
+                                fs_num_outputs,
+                                outputMapping,
+                                fs_output_semantic_name,
+                                fs_output_semantic_index,
+                                /* tokenized result */
+                                tokens, ST_MAX_SHADER_TOKENS);
+
+   fs.tokens = (struct tgsi_token *)
+      mem_dup(tokens, num_tokens * sizeof(tokens[0]));
+
+   stfp->state = fs; /* struct copy */
+   stfp->driver_shader = pipe->create_fs_state(pipe, &fs);
+
+   if (0)
+      _mesa_print_program(&stfp->Base.Base);
 
    if (TGSI_DEBUG)
-      tgsi_dump( tokensOut, 0/*TGSI_DUMP_VERBOSE*/ );
-
-#if defined(__i386__) || defined(__386__)
-   if (stfp->sse2_program.csr == stfp->sse2_program.store)
-      tgsi_emit_sse2_fs( tokensOut, &stfp->sse2_program );
-
-   if (!cso->state.executable)
-      ((struct cso_fragment_shader*)cso)->state.executable = (void *) x86_get_func( &stfp->sse2_program );
-#endif
-
-   return cso;
+      tgsi_dump( fs.tokens, 0/*TGSI_DUMP_VERBOSE*/ );
 }