* Added options for disabling the fast path (render stage) and vertex DMA
[mesa.git] / src / mesa / drivers / dri / savage / savagerender.c
1 /*
2 * Copyright 2005 Felix Kuehling
3 * All rights reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sub license,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the
13 * next paragraph) shall be included in all copies or substantial portions
14 * of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19 * NON-INFRINGEMENT. IN NO EVENT SHALL FELIX KUEHLING BE LIABLE FOR
20 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
21 * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25 /*
26 * Render unclipped vertex buffers by emitting vertices directly to
27 * dma buffers. Use strip/fan hardware primitives where possible.
28 * Simulate missing primitives with indexed vertices.
29 */
30 #include "glheader.h"
31 #include "context.h"
32 #include "macros.h"
33 #include "imports.h"
34 #include "mtypes.h"
35
36 #include "tnl/t_context.h"
37
38 #include "savagecontext.h"
39 #include "savagetris.h"
40 #include "savagestate.h"
41 #include "savageioctl.h"
42
43 /*
44 * Standard render tab for Savage4 and smooth shading on Savage3D
45 */
46 #define HAVE_POINTS 0
47 #define HAVE_LINES 0
48 #define HAVE_LINE_STRIPS 0
49 #define HAVE_TRIANGLES 1
50 #define HAVE_TRI_STRIPS 1
51 #define HAVE_TRI_STRIP_1 0
52 #define HAVE_TRI_FANS 1
53 #define HAVE_POLYGONS 0
54 #define HAVE_QUADS 0
55 #define HAVE_QUAD_STRIPS 0
56
57 #define HAVE_ELTS 1
58
59 #define LOCAL_VARS savageContextPtr imesa = SAVAGE_CONTEXT(ctx)
60 #define INIT( prim ) do { \
61 if (0) fprintf(stderr, "%s\n", __FUNCTION__); \
62 savageFlushVertices(imesa); \
63 switch (prim) { \
64 case GL_TRIANGLES: imesa->HwPrim = SAVAGE_PRIM_TRILIST; break; \
65 case GL_TRIANGLE_STRIP: imesa->HwPrim = SAVAGE_PRIM_TRISTRIP; break; \
66 case GL_TRIANGLE_FAN: imesa->HwPrim = SAVAGE_PRIM_TRIFAN; break; \
67 } \
68 } while (0)
69 #define FLUSH() savageFlushElts(imesa), savageFlushVertices(imesa)
70
71 #define GET_CURRENT_VB_MAX_VERTS() \
72 ((imesa->bufferSize/4 - imesa->vtxBuf->used) / imesa->HwVertexSize)
73 #define GET_SUBSEQUENT_VB_MAX_VERTS() \
74 (imesa->bufferSize/4 / imesa->HwVertexSize)
75
76 #define ALLOC_VERTS( nr ) \
77 savageAllocVtxBuf( imesa, (nr) * imesa->HwVertexSize )
78 #define EMIT_VERTS( ctx, j, nr, buf ) \
79 _tnl_emit_vertices_to_buffer(ctx, j, (j)+(nr), buf )
80
81 #define ELTS_VARS( buf ) GLushort *dest = buf, firstElt = imesa->firstElt
82 #define ELT_INIT( prim ) INIT(prim)
83
84 /* (size - used - 1 qword for drawing command) * 4 elts per qword */
85 #define GET_CURRENT_VB_MAX_ELTS() \
86 ((imesa->cmdBuf.size - (imesa->cmdBuf.write - imesa->cmdBuf.base) - 1)*4)
87 /* (size - space for initial state - 1 qword for drawing command) * 4 elts
88 * imesa is not defined in validate_render :( */
89 #define GET_SUBSEQUENT_VB_MAX_ELTS() \
90 ((SAVAGE_CONTEXT(ctx)->cmdBuf.size - \
91 (SAVAGE_CONTEXT(ctx)->cmdBuf.start - \
92 SAVAGE_CONTEXT(ctx)->cmdBuf.base) - 1)*4)
93
94 #define ALLOC_ELTS(nr) savageAllocElts(imesa, nr)
95 #define EMIT_ELT(offset, x) do { \
96 (dest)[offset] = (GLushort) ((x)+firstElt); \
97 } while (0)
98 #define EMIT_TWO_ELTS(offset, x, y) do { \
99 *(GLuint *)(dest + offset) = (((y)+firstElt) << 16) | \
100 ((x)+firstElt); \
101 } while (0)
102
103 #define INCR_ELTS( nr ) dest += nr
104 #define ELTPTR dest
105 #define RELEASE_ELT_VERTS() \
106 savageReleaseIndexedVerts(imesa)
107
108 #define EMIT_INDEXED_VERTS( ctx, start, count ) do { \
109 GLuint *buf = savageAllocIndexedVerts(imesa, count-start); \
110 EMIT_VERTS(ctx, start, count-start, buf); \
111 } while (0)
112
113 #define TAG(x) savage_##x
114 #include "tnl_dd/t_dd_dmatmp.h"
115
116 /*
117 * On Savage3D triangle fans and strips are broken with flat
118 * shading. With triangles it wants the color for flat shading in the
119 * first vertex! So we make another template instance which uses
120 * triangles only (with reordered vertices: SAVAGE_PRIM_TRILIST_201).
121 * The reordering is done by the DRM.
122 */
123 #undef HAVE_TRI_STRIPS
124 #undef HAVE_TRI_FANS
125 #define HAVE_TRI_STRIPS 0
126 #define HAVE_TRI_FANS 0
127
128 #undef INIT
129 #define INIT( prim ) do { \
130 if (0) fprintf(stderr, "%s\n", __FUNCTION__); \
131 savageFlushVertices(imesa); \
132 imesa->HwPrim = SAVAGE_PRIM_TRILIST_201; \
133 } while(0)
134
135 #undef TAG
136 #define TAG(x) savage_flat_##x##_s3d
137 #include "tnl_dd/t_dd_dmatmp.h"
138
139
140 /**********************************************************************/
141 /* Render pipeline stage */
142 /**********************************************************************/
143
144 static GLboolean savage_run_render( GLcontext *ctx,
145 struct tnl_pipeline_stage *stage )
146 {
147 savageContextPtr imesa = SAVAGE_CONTEXT(ctx);
148 TNLcontext *tnl = TNL_CONTEXT(ctx);
149 struct vertex_buffer *VB = &tnl->vb;
150 tnl_render_func *tab, *tab_elts;
151 GLboolean valid;
152 GLuint i;
153
154 if (savageHaveIndexedVerts(imesa) && (!VB->Elts || stage->changed_inputs))
155 savageReleaseIndexedVerts(imesa);
156
157 if (imesa->savageScreen->chipset < S3_SAVAGE4 &&
158 (ctx->_TriangleCaps & DD_FLATSHADE)) {
159 tab = savage_flat_render_tab_verts_s3d;
160 tab_elts = savage_flat_render_tab_elts_s3d;
161 valid = savage_flat_validate_render_s3d( ctx, VB );
162 } else {
163 tab = savage_render_tab_verts;
164 tab_elts = savage_render_tab_elts;
165 valid = savage_validate_render( ctx, VB );
166 }
167
168 /* Don't handle clipping or vertex manipulations.
169 */
170 if (imesa->RenderIndex != 0 || !valid) {
171 return GL_TRUE;
172 }
173
174 tnl->Driver.Render.Start( ctx );
175 /* Check RenderIndex again. The ptexHack is detected late in RenderStart.
176 * Also check for ptex fallbacks detected late.
177 */
178 if (imesa->RenderIndex != 0 || imesa->Fallback != 0) {
179 return GL_TRUE;
180 }
181
182 /* setup for hardware culling */
183 imesa->raster_primitive = GL_TRIANGLES;
184 imesa->new_state |= SAVAGE_NEW_CULL;
185
186 /* update and emit state */
187 savageDDUpdateHwState(ctx);
188 savageEmitChangedState(imesa);
189
190 if (VB->Elts) {
191 tab = tab_elts;
192 if (!savageHaveIndexedVerts(imesa)) {
193 if (VB->Count > GET_SUBSEQUENT_VB_MAX_VERTS())
194 return GL_TRUE;
195 EMIT_INDEXED_VERTS(ctx, 0, VB->Count);
196 }
197 }
198
199 for (i = 0 ; i < VB->PrimitiveCount ; i++)
200 {
201 GLuint prim = VB->Primitive[i].mode;
202 GLuint start = VB->Primitive[i].start;
203 GLuint length = VB->Primitive[i].count;
204
205 if (length)
206 tab[prim & PRIM_MODE_MASK]( ctx, start, start+length, prim);
207 }
208
209 tnl->Driver.Render.Finish( ctx );
210
211 return GL_FALSE; /* finished the pipe */
212 }
213
214 static void savage_check_render( GLcontext *ctx,
215 struct tnl_pipeline_stage *stage )
216 {
217 stage->inputs = TNL_CONTEXT(ctx)->render_inputs;
218 stage->active = SAVAGE_CONTEXT(ctx)->enable_fastpath;
219 }
220
221 static void dtr( struct tnl_pipeline_stage *stage )
222 {
223 (void)stage;
224 }
225
226 struct tnl_pipeline_stage _savage_render_stage =
227 {
228 "savage render",
229 (_DD_NEW_SEPARATE_SPECULAR |
230 _NEW_TEXTURE|
231 _NEW_FOG|
232 _NEW_RENDERMODE), /* re-check (new inputs) */
233 0, /* re-run (always runs) */
234 GL_TRUE, /* active */
235 0, 0, /* inputs (set in check_render), outputs */
236 0, 0, /* changed_inputs, private */
237 dtr, /* destructor */
238 savage_check_render, /* check - initially set to alloc data */
239 savage_run_render /* run */
240 };
241
242
243 /**********************************************************************/
244 /* Pipeline stage for texture coordinate normalization */
245 /**********************************************************************/
246 struct texnorm_stage_data {
247 GLvector4f texcoord[MAX_TEXTURE_UNITS];
248 };
249
250 #define TEXNORM_STAGE_DATA(stage) ((struct texnorm_stage_data *)stage->privatePtr)
251
252
253 static GLboolean run_texnorm_stage( GLcontext *ctx,
254 struct tnl_pipeline_stage *stage )
255 {
256 struct texnorm_stage_data *store = TEXNORM_STAGE_DATA(stage);
257 savageContextPtr imesa = SAVAGE_CONTEXT(ctx);
258 TNLcontext *tnl = TNL_CONTEXT(ctx);
259 struct vertex_buffer *VB = &tnl->vb;
260 GLuint i;
261
262 if (imesa->Fallback)
263 return GL_TRUE;
264
265 for (i = 0 ; i < ctx->Const.MaxTextureUnits ; i++) {
266 if (!(stage->inputs & stage->changed_inputs & VERT_BIT_TEX(i)) ||
267 VB->TexCoordPtr[i]->size == 4)
268 /* Never try to normalize homogenous tex coords! */
269 continue;
270
271 GLuint reallyEnabled = ctx->Texture.Unit[i]._ReallyEnabled;
272 struct gl_texture_object *texObj = ctx->Texture.Unit[i]._Current;
273 GLboolean normalizeS = (texObj->WrapS == GL_REPEAT);
274 GLboolean normalizeT = (reallyEnabled & TEXTURE_2D_BIT) &&
275 (texObj->WrapT == GL_REPEAT);
276 GLfloat *in = (GLfloat *)VB->TexCoordPtr[i]->data;
277 GLint instride = VB->TexCoordPtr[i]->stride;
278 GLfloat (*out)[4] = store->texcoord[i].data;
279 GLint j;
280
281 if (normalizeS && normalizeT) {
282 /* take first texcoords as rough estimate of mean value */
283 GLfloat correctionS = -floor(in[0]+0.5);
284 GLfloat correctionT = -floor(in[1]+0.5);
285 for (j = 0; j < VB->Count; ++j) {
286 out[j][0] = in[0] + correctionS;
287 out[j][1] = in[1] + correctionT;
288 in = (GLfloat *)((GLubyte *)in + instride);
289 }
290 } else if (normalizeS) {
291 /* take first texcoords as rough estimate of mean value */
292 GLfloat correctionS = -floor(in[0]+0.5);
293 if (reallyEnabled & TEXTURE_2D_BIT) {
294 for (j = 0; j < VB->Count; ++j) {
295 out[j][0] = in[0] + correctionS;
296 out[j][1] = in[1];
297 in = (GLfloat *)((GLubyte *)in + instride);
298 }
299 } else {
300 for (j = 0; j < VB->Count; ++j) {
301 out[j][0] = in[0] + correctionS;
302 in = (GLfloat *)((GLubyte *)in + instride);
303 }
304 }
305 } else if (normalizeT) {
306 /* take first texcoords as rough estimate of mean value */
307 GLfloat correctionT = -floor(in[1]+0.5);
308 for (j = 0; j < VB->Count; ++j) {
309 out[j][0] = in[0];
310 out[j][1] = in[1] + correctionT;
311 in = (GLfloat *)((GLubyte *)in + instride);
312 }
313 }
314
315 if (normalizeS || normalizeT)
316 VB->AttribPtr[VERT_ATTRIB_TEX0+i] = VB->TexCoordPtr[i] = &store->texcoord[i];
317 }
318
319 return GL_TRUE;
320 }
321
322 /* Called the first time stage->run() is invoked.
323 */
324 static GLboolean alloc_texnorm_data( GLcontext *ctx,
325 struct tnl_pipeline_stage *stage )
326 {
327 struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb;
328 struct texnorm_stage_data *store;
329 GLuint i;
330
331 stage->privatePtr = CALLOC(sizeof(*store));
332 store = TEXNORM_STAGE_DATA(stage);
333 if (!store)
334 return GL_FALSE;
335
336 for (i = 0 ; i < ctx->Const.MaxTextureUnits ; i++)
337 _mesa_vector4f_alloc( &store->texcoord[i], 0, VB->Size, 32 );
338
339 /* Now run the stage.
340 */
341 stage->run = run_texnorm_stage;
342 return stage->run( ctx, stage );
343 }
344
345 static void check_texnorm( GLcontext *ctx,
346 struct tnl_pipeline_stage *stage )
347 {
348 GLuint flags = 0;
349
350 if (((ctx->Texture.Unit[0]._ReallyEnabled & (TEXTURE_1D_BIT|TEXTURE_2D_BIT)) &&
351 (ctx->Texture.Unit[0]._Current->WrapS == GL_REPEAT)) ||
352 ((ctx->Texture.Unit[0]._ReallyEnabled & TEXTURE_2D_BIT) &&
353 (ctx->Texture.Unit[0]._Current->WrapT == GL_REPEAT)))
354 flags |= VERT_BIT_TEX0;
355
356 if (((ctx->Texture.Unit[1]._ReallyEnabled & (TEXTURE_1D_BIT|TEXTURE_2D_BIT)) &&
357 (ctx->Texture.Unit[1]._Current->WrapS == GL_REPEAT)) ||
358 ((ctx->Texture.Unit[1]._ReallyEnabled & TEXTURE_2D_BIT) &&
359 (ctx->Texture.Unit[1]._Current->WrapT == GL_REPEAT)))
360 flags |= VERT_BIT_TEX1;
361
362 stage->inputs = flags;
363 stage->outputs = flags;
364 stage->active = (flags != 0);
365 }
366
367 static void free_texnorm_data( struct tnl_pipeline_stage *stage )
368 {
369 struct texnorm_stage_data *store = TEXNORM_STAGE_DATA(stage);
370 GLuint i;
371
372 if (store) {
373 for (i = 0 ; i < MAX_TEXTURE_UNITS ; i++)
374 if (store->texcoord[i].data)
375 _mesa_vector4f_free( &store->texcoord[i] );
376 FREE( store );
377 stage->privatePtr = 0;
378 }
379 }
380
381 struct tnl_pipeline_stage _savage_texnorm_stage =
382 {
383 "savage texture coordinate normalization stage", /* name */
384 _NEW_TEXTURE, /* check_state */
385 _NEW_TEXTURE, /* run_state */
386 GL_TRUE, /* active? */
387 0, /* inputs */
388 0, /* outputs */
389 0, /* changed_inputs */
390 NULL, /* private data */
391 free_texnorm_data, /* destructor */
392 check_texnorm, /* check */
393 alloc_texnorm_data, /* run -- initially set to init */
394 };