2 * Mesa 3-D graphics library
4 * Copyright (C) 1999-2007 Brian Paul All Rights Reserved.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
25 * Keith Whitwell <keithw@vmware.com>
28 #include "main/glheader.h"
29 #include "main/context.h"
30 #include "main/imports.h"
31 #include "main/mtypes.h"
33 #include "t_context.h"
34 #include "t_pipeline.h"
35 #include "t_vp_build.h"
38 void _tnl_install_pipeline( struct gl_context
*ctx
,
39 const struct tnl_pipeline_stage
**stages
)
41 TNLcontext
*tnl
= TNL_CONTEXT(ctx
);
44 tnl
->pipeline
.new_state
= ~0;
46 /* Create a writeable copy of each stage.
48 for (i
= 0 ; i
< MAX_PIPELINE_STAGES
&& stages
[i
] ; i
++) {
49 struct tnl_pipeline_stage
*s
= &tnl
->pipeline
.stages
[i
];
50 memcpy(s
, stages
[i
], sizeof(*s
));
55 tnl
->pipeline
.nr_stages
= i
;
58 void _tnl_destroy_pipeline( struct gl_context
*ctx
)
60 TNLcontext
*tnl
= TNL_CONTEXT(ctx
);
63 for (i
= 0 ; i
< tnl
->pipeline
.nr_stages
; i
++) {
64 struct tnl_pipeline_stage
*s
= &tnl
->pipeline
.stages
[i
];
69 tnl
->pipeline
.nr_stages
= 0;
74 static GLuint
check_input_changes( struct gl_context
*ctx
)
76 TNLcontext
*tnl
= TNL_CONTEXT(ctx
);
79 for (i
= 0; i
<= _TNL_LAST_MAT
; i
++) {
80 if (tnl
->vb
.AttribPtr
[i
]->size
!= tnl
->pipeline
.last_attrib_size
[i
] ||
81 tnl
->vb
.AttribPtr
[i
]->stride
!= tnl
->pipeline
.last_attrib_stride
[i
]) {
82 tnl
->pipeline
.last_attrib_size
[i
] = tnl
->vb
.AttribPtr
[i
]->size
;
83 tnl
->pipeline
.last_attrib_stride
[i
] = tnl
->vb
.AttribPtr
[i
]->stride
;
84 tnl
->pipeline
.input_changes
|= 1<<i
;
88 return tnl
->pipeline
.input_changes
;
92 static GLuint
check_output_changes( struct gl_context
*ctx
)
95 TNLcontext
*tnl
= TNL_CONTEXT(ctx
);
97 for (i
= 0; i
< VARYING_SLOT_MAX
; i
++) {
98 if (tnl
->vb
.ResultPtr
[i
]->size
!= tnl
->last_result_size
[i
] ||
99 tnl
->vb
.ResultPtr
[i
]->stride
!= tnl
->last_result_stride
[i
]) {
100 tnl
->last_result_size
[i
] = tnl
->vb
.ResultPtr
[i
]->size
;
101 tnl
->last_result_stride
[i
] = tnl
->vb
.ResultPtr
[i
]->stride
;
102 tnl
->pipeline
.output_changes
|= 1<<i
;
106 if (tnl
->pipeline
.output_changes
)
107 tnl
->Driver
.NotifyOutputChanges( ctx
, tnl
->pipeline
.output_changes
);
109 return tnl
->pipeline
.output_changes
;
116 * START/END_FAST_MATH macros:
118 * START_FAST_MATH: Set x86 FPU to faster, 32-bit precision mode (and save
119 * original mode to a temporary).
120 * END_FAST_MATH: Restore x86 FPU to original mode.
122 #if defined(__GNUC__) && defined(__i386__)
124 * Set the x86 FPU control word to guarentee only 32 bits of precision
125 * are stored in registers. Allowing the FPU to store more introduces
126 * differences between situations where numbers are pulled out of memory
127 * vs. situations where the compiler is able to optimize register usage.
129 * In the worst case, we force the compiler to use a memory access to
130 * truncate the float, by specifying the 'volatile' keyword.
132 /* Hardware default: All exceptions masked, extended double precision,
133 * round to nearest (IEEE compliant):
135 #define DEFAULT_X86_FPU 0x037f
136 /* All exceptions masked, single precision, round to nearest:
138 #define FAST_X86_FPU 0x003f
139 /* The fldcw instruction will cause any pending FP exceptions to be
140 * raised prior to entering the block, and we clear any pending
141 * exceptions before exiting the block. Hence, asm code has free
142 * reign over the FPU while in the fast math block.
144 #if defined(NO_FAST_MATH)
145 #define START_FAST_MATH(x) \
147 static GLuint mask = DEFAULT_X86_FPU; \
148 __asm__ ( "fnstcw %0" : "=m" (*&(x)) ); \
149 __asm__ ( "fldcw %0" : : "m" (mask) ); \
152 #define START_FAST_MATH(x) \
154 static GLuint mask = FAST_X86_FPU; \
155 __asm__ ( "fnstcw %0" : "=m" (*&(x)) ); \
156 __asm__ ( "fldcw %0" : : "m" (mask) ); \
159 /* Restore original FPU mode, and clear any exceptions that may have
160 * occurred in the FAST_MATH block.
162 #define END_FAST_MATH(x) \
164 __asm__ ( "fnclex ; fldcw %0" : : "m" (*&(x)) ); \
167 #elif defined(_MSC_VER) && defined(_M_IX86)
168 #define DEFAULT_X86_FPU 0x037f /* See GCC comments above */
169 #define FAST_X86_FPU 0x003f /* See GCC comments above */
170 #if defined(NO_FAST_MATH)
171 #define START_FAST_MATH(x) do {\
172 static GLuint mask = DEFAULT_X86_FPU;\
173 __asm fnstcw word ptr [x]\
174 __asm fldcw word ptr [mask]\
177 #define START_FAST_MATH(x) do {\
178 static GLuint mask = FAST_X86_FPU;\
179 __asm fnstcw word ptr [x]\
180 __asm fldcw word ptr [mask]\
183 #define END_FAST_MATH(x) do {\
185 __asm fldcw word ptr [x]\
189 #define START_FAST_MATH(x) x = 0
190 #define END_FAST_MATH(x) (void)(x)
194 void _tnl_run_pipeline( struct gl_context
*ctx
)
196 TNLcontext
*tnl
= TNL_CONTEXT(ctx
);
197 unsigned short __tmp
;
203 /* Check for changed input sizes or change in stride to/from zero
204 * (ie const or non-const).
206 if (check_input_changes( ctx
) || tnl
->pipeline
.new_state
) {
207 if (ctx
->VertexProgram
._MaintainTnlProgram
)
208 _tnl_UpdateFixedFunctionProgram( ctx
);
210 for (i
= 0; i
< tnl
->pipeline
.nr_stages
; i
++) {
211 struct tnl_pipeline_stage
*s
= &tnl
->pipeline
.stages
[i
];
213 s
->validate( ctx
, s
);
216 tnl
->pipeline
.new_state
= 0;
217 tnl
->pipeline
.input_changes
= 0;
219 /* Pipeline can only change its output in response to either a
220 * statechange or an input size/stride change. No other changes
223 if (check_output_changes( ctx
))
224 _tnl_notify_pipeline_output_change( ctx
);
228 /* Don't adjust FPU precision mode in case multiple threads are to be used.
229 * This would require that the additional threads also changed the FPU mode
230 * which is quite a mess as this had to be done in all parallelized sections;
231 * otherwise the master thread and all other threads are running in different
232 * modes, producing inconsistent results.
233 * Note that all x64 implementations don't define/use START_FAST_MATH, so
234 * this is "hack" is only used in i386 mode
236 START_FAST_MATH(__tmp
);
239 for (i
= 0; i
< tnl
->pipeline
.nr_stages
; i
++) {
240 struct tnl_pipeline_stage
*s
= &tnl
->pipeline
.stages
[i
];
241 if (!s
->run( ctx
, s
))
246 END_FAST_MATH(__tmp
);
252 /* The default pipeline. This is useful for software rasterizers, and
253 * simple hardware rasterizers. For customization, I don't recommend
254 * tampering with the internals of these stages in the way that
255 * drivers did in Mesa 3.4. These stages are basically black boxes,
256 * and should be left intact.
258 * To customize the pipeline, consider:
260 * - removing redundant stages (making sure that the software rasterizer
261 * can cope with this on fallback paths). An example is fog
262 * coordinate generation, which is not required in the FX driver.
264 * - replacing general-purpose machine-independent stages with
265 * general-purpose machine-specific stages. There is no example of
266 * this to date, though it must be borne in mind that all subsequent
267 * stages that reference the output of the new stage must cope with
268 * any machine-specific data introduced. This may not be easy
269 * unless there are no such stages (ie the new stage is the last in
272 * - inserting optimized (but specialized) stages ahead of the
273 * general-purpose fallback implementation. For example, the old
274 * fastpath mechanism, which only works when the VB->Elts input is
275 * available, can be duplicated by placing the fastpath stage at the
276 * head of this pipeline. Such specialized stages are currently
277 * constrained to have no outputs (ie. they must either finish the *
278 * pipeline by returning GL_FALSE from run(), or do nothing).
280 * Some work can be done to lift some of the restrictions in the final
281 * case, if it becomes necessary to do so.
283 const struct tnl_pipeline_stage
*_tnl_default_pipeline
[] = {
284 &_tnl_vertex_transform_stage
,
285 &_tnl_normal_transform_stage
,
286 &_tnl_lighting_stage
,
288 &_tnl_texture_transform_stage
,
289 &_tnl_point_attenuation_stage
,
290 &_tnl_vertex_program_stage
,
291 &_tnl_fog_coordinate_stage
,
296 const struct tnl_pipeline_stage
*_tnl_vp_pipeline
[] = {
297 &_tnl_vertex_program_stage
,