src/mesa/tnl/t_pipeline.c

   1 /*
   2  * Mesa 3-D graphics library
   3  *
   4  * Copyright (C) 1999-2007  Brian Paul   All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the "Software"),
   8  * to deal in the Software without restriction, including without limitation
   9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  10  * and/or sell copies of the Software, and to permit persons to whom the
  11  * Software is furnished to do so, subject to the following conditions:
  12  *
  13  * The above copyright notice and this permission notice shall be included
  14  * in all copies or substantial portions of the Software.
  15  *
  16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
  20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  22  * OTHER DEALINGS IN THE SOFTWARE.
  23  *
  24  * Authors:
  25  *    Keith Whitwell <keithw@vmware.com>
  26  */
  27
  28 #include "main/glheader.h"
  29 #include "main/context.h"
  30 #include "main/imports.h"
  31 #include "main/mtypes.h"
  32
  33 #include "t_context.h"
  34 #include "t_pipeline.h"
  35 #include "t_vp_build.h"
  36 #include "t_vertex.h"
  37
  38 void _tnl_install_pipeline( struct gl_context *ctx,
  39                             const struct tnl_pipeline_stage **stages )
  40 {
  41    TNLcontext *tnl = TNL_CONTEXT(ctx);
  42    GLuint i;
  43
  44    tnl->pipeline.new_state = ~0;
  45
  46    /* Create a writeable copy of each stage.
  47     */
  48    for (i = 0 ; i < MAX_PIPELINE_STAGES && stages[i] ; i++) {
  49       struct tnl_pipeline_stage *s = &tnl->pipeline.stages[i];
  50       memcpy(s, stages[i], sizeof(*s));
  51       if (s->create)
  52          s->create(ctx, s);
  53    }
  54
  55    tnl->pipeline.nr_stages = i;
  56 }
  57
  58 void _tnl_destroy_pipeline( struct gl_context *ctx )
  59 {
  60    TNLcontext *tnl = TNL_CONTEXT(ctx);
  61    GLuint i;
  62
  63    for (i = 0 ; i < tnl->pipeline.nr_stages ; i++) {
  64       struct tnl_pipeline_stage *s = &tnl->pipeline.stages[i];
  65       if (s->destroy)
  66          s->destroy(s);
  67    }
  68
  69    tnl->pipeline.nr_stages = 0;
  70 }
  71
  72
  73
  74 static GLuint check_input_changes( struct gl_context *ctx )
  75 {
  76    TNLcontext *tnl = TNL_CONTEXT(ctx);
  77    GLuint i;
  78
  79    for (i = 0; i <= _TNL_LAST_MAT; i++) {
  80       if (tnl->vb.AttribPtr[i]->size != tnl->pipeline.last_attrib_size[i] ||
  81           tnl->vb.AttribPtr[i]->stride != tnl->pipeline.last_attrib_stride[i]) {
  82          tnl->pipeline.last_attrib_size[i] = tnl->vb.AttribPtr[i]->size;
  83          tnl->pipeline.last_attrib_stride[i] = tnl->vb.AttribPtr[i]->stride;
  84          tnl->pipeline.input_changes |= 1<<i;
  85       }
  86    }
  87
  88    return tnl->pipeline.input_changes;
  89 }
  90
  91
  92 static GLuint check_output_changes( struct gl_context *ctx )
  93 {
  94 #if 0
  95    TNLcontext *tnl = TNL_CONTEXT(ctx);
  96
  97    for (i = 0; i < VARYING_SLOT_MAX; i++) {
  98       if (tnl->vb.ResultPtr[i]->size != tnl->last_result_size[i] ||
  99           tnl->vb.ResultPtr[i]->stride != tnl->last_result_stride[i]) {
 100          tnl->last_result_size[i] = tnl->vb.ResultPtr[i]->size;
 101          tnl->last_result_stride[i] = tnl->vb.ResultPtr[i]->stride;
 102          tnl->pipeline.output_changes |= 1<<i;
 103       }
 104    }
 105
 106    if (tnl->pipeline.output_changes)
 107       tnl->Driver.NotifyOutputChanges( ctx, tnl->pipeline.output_changes );
 108
 109    return tnl->pipeline.output_changes;
 110 #else
 111    return ~0;
 112 #endif
 113 }
 114
 115 /**
 116  * START/END_FAST_MATH macros:
 117  *
 118  * START_FAST_MATH: Set x86 FPU to faster, 32-bit precision mode (and save
 119  *                  original mode to a temporary).
 120  * END_FAST_MATH: Restore x86 FPU to original mode.
 121  */
 122 #if defined(__GNUC__) && defined(__i386__)
 123 /*
 124  * Set the x86 FPU control word to guarentee only 32 bits of precision
 125  * are stored in registers.  Allowing the FPU to store more introduces
 126  * differences between situations where numbers are pulled out of memory
 127  * vs. situations where the compiler is able to optimize register usage.
 128  *
 129  * In the worst case, we force the compiler to use a memory access to
 130  * truncate the float, by specifying the 'volatile' keyword.
 131  */
 132 /* Hardware default: All exceptions masked, extended double precision,
 133  * round to nearest (IEEE compliant):
 134  */
 135 #define DEFAULT_X86_FPU         0x037f
 136 /* All exceptions masked, single precision, round to nearest:
 137  */
 138 #define FAST_X86_FPU            0x003f
 139 /* The fldcw instruction will cause any pending FP exceptions to be
 140  * raised prior to entering the block, and we clear any pending
 141  * exceptions before exiting the block.  Hence, asm code has free
 142  * reign over the FPU while in the fast math block.
 143  */
 144 #if defined(NO_FAST_MATH)
 145 #define START_FAST_MATH(x)                                              \
 146 do {                                                                    \
 147    static GLuint mask = DEFAULT_X86_FPU;                                \
 148    __asm__ ( "fnstcw %0" : "=m" (*&(x)) );                              \
 149    __asm__ ( "fldcw %0" : : "m" (mask) );                               \
 150 } while (0)
 151 #else
 152 #define START_FAST_MATH(x)                                              \
 153 do {                                                                    \
 154    static GLuint mask = FAST_X86_FPU;                                   \
 155    __asm__ ( "fnstcw %0" : "=m" (*&(x)) );                              \
 156    __asm__ ( "fldcw %0" : : "m" (mask) );                               \
 157 } while (0)
 158 #endif
 159 /* Restore original FPU mode, and clear any exceptions that may have
 160  * occurred in the FAST_MATH block.
 161  */
 162 #define END_FAST_MATH(x)                                                \
 163 do {                                                                    \
 164    __asm__ ( "fnclex ; fldcw %0" : : "m" (*&(x)) );                     \
 165 } while (0)
 166
 167 #elif defined(_MSC_VER) && defined(_M_IX86)
 168 #define DEFAULT_X86_FPU         0x037f /* See GCC comments above */
 169 #define FAST_X86_FPU            0x003f /* See GCC comments above */
 170 #if defined(NO_FAST_MATH)
 171 #define START_FAST_MATH(x) do {\
 172         static GLuint mask = DEFAULT_X86_FPU;\
 173         __asm fnstcw word ptr [x]\
 174         __asm fldcw word ptr [mask]\
 175 } while(0)
 176 #else
 177 #define START_FAST_MATH(x) do {\
 178         static GLuint mask = FAST_X86_FPU;\
 179         __asm fnstcw word ptr [x]\
 180         __asm fldcw word ptr [mask]\
 181 } while(0)
 182 #endif
 183 #define END_FAST_MATH(x) do {\
 184         __asm fnclex\
 185         __asm fldcw word ptr [x]\
 186 } while(0)
 187
 188 #else
 189 #define START_FAST_MATH(x)  x = 0
 190 #define END_FAST_MATH(x)  (void)(x)
 191 #endif
 192
 193
 194 void _tnl_run_pipeline( struct gl_context *ctx )
 195 {
 196    TNLcontext *tnl = TNL_CONTEXT(ctx);
 197    unsigned short __tmp;
 198    GLuint i;
 199
 200    if (!tnl->vb.Count)
 201       return;
 202
 203    /* Check for changed input sizes or change in stride to/from zero
 204     * (ie const or non-const).
 205     */
 206    if (check_input_changes( ctx ) || tnl->pipeline.new_state) {
 207       if (ctx->VertexProgram._MaintainTnlProgram)
 208          _tnl_UpdateFixedFunctionProgram( ctx );
 209
 210       for (i = 0; i < tnl->pipeline.nr_stages ; i++) {
 211          struct tnl_pipeline_stage *s = &tnl->pipeline.stages[i];
 212          if (s->validate)
 213             s->validate( ctx, s );
 214       }
 215
 216       tnl->pipeline.new_state = 0;
 217       tnl->pipeline.input_changes = 0;
 218
 219       /* Pipeline can only change its output in response to either a
 220        * statechange or an input size/stride change.  No other changes
 221        * are allowed.
 222        */
 223       if (check_output_changes( ctx ))
 224          _tnl_notify_pipeline_output_change( ctx );
 225    }
 226
 227 #ifndef _OPENMP
 228    /* Don't adjust FPU precision mode in case multiple threads are to be used.
 229     * This would require that the additional threads also changed the FPU mode
 230     * which is quite a mess as this had to be done in all parallelized sections;
 231     * otherwise the master thread and all other threads are running in different
 232     * modes, producing inconsistent results.
 233     * Note that all x64 implementations don't define/use START_FAST_MATH, so
 234     * this is "hack" is only used in i386 mode
 235     */
 236    START_FAST_MATH(__tmp);
 237 #endif
 238
 239    for (i = 0; i < tnl->pipeline.nr_stages ; i++) {
 240       struct tnl_pipeline_stage *s = &tnl->pipeline.stages[i];
 241       if (!s->run( ctx, s ))
 242          break;
 243    }
 244
 245 #ifndef _OPENMP
 246    END_FAST_MATH(__tmp);
 247 #endif
 248 }
 249
 250
 251
 252 /* The default pipeline.  This is useful for software rasterizers, and
 253  * simple hardware rasterizers.  For customization, I don't recommend
 254  * tampering with the internals of these stages in the way that
 255  * drivers did in Mesa 3.4.  These stages are basically black boxes,
 256  * and should be left intact.
 257  *
 258  * To customize the pipeline, consider:
 259  *
 260  * - removing redundant stages (making sure that the software rasterizer
 261  *   can cope with this on fallback paths).  An example is fog
 262  *   coordinate generation, which is not required in the FX driver.
 263  *
 264  * - replacing general-purpose machine-independent stages with
 265  *   general-purpose machine-specific stages.  There is no example of
 266  *   this to date, though it must be borne in mind that all subsequent
 267  *   stages that reference the output of the new stage must cope with
 268  *   any machine-specific data introduced.  This may not be easy
 269  *   unless there are no such stages (ie the new stage is the last in
 270  *   the pipe).
 271  *
 272  * - inserting optimized (but specialized) stages ahead of the
 273  *   general-purpose fallback implementation.  For example, the old
 274  *   fastpath mechanism, which only works when the VB->Elts input is
 275  *   available, can be duplicated by placing the fastpath stage at the
 276  *   head of this pipeline.  Such specialized stages are currently
 277  *   constrained to have no outputs (ie. they must either finish the *
 278  *   pipeline by returning GL_FALSE from run(), or do nothing).
 279  *
 280  * Some work can be done to lift some of the restrictions in the final
 281  * case, if it becomes necessary to do so.
 282  */
 283 const struct tnl_pipeline_stage *_tnl_default_pipeline[] = {
 284    &_tnl_vertex_transform_stage,
 285    &_tnl_normal_transform_stage,
 286    &_tnl_lighting_stage,
 287    &_tnl_texgen_stage,
 288    &_tnl_texture_transform_stage,
 289    &_tnl_point_attenuation_stage,
 290    &_tnl_vertex_program_stage,
 291    &_tnl_fog_coordinate_stage,
 292    &_tnl_render_stage,
 293    NULL
 294 };
 295
 296 const struct tnl_pipeline_stage *_tnl_vp_pipeline[] = {
 297    &_tnl_vertex_program_stage,
 298    &_tnl_render_stage,
 299    NULL
 300 };