t_dd_dmatmp: Make "count" actually be the count
[mesa.git] / src / mesa / drivers / dri / i915 / intel_render.c
1 /**************************************************************************
2 *
3 * Copyright 2003 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /*
29 * Render unclipped vertex buffers by emitting vertices directly to
30 * dma buffers. Use strip/fan hardware acceleration where possible.
31 *
32 */
33 #include "main/glheader.h"
34 #include "main/context.h"
35 #include "main/macros.h"
36 #include "main/imports.h"
37 #include "main/mtypes.h"
38 #include "main/enums.h"
39
40 #include "math/m_xform.h"
41
42 #include "tnl/t_context.h"
43 #include "tnl/t_vertex.h"
44 #include "tnl/t_pipeline.h"
45
46 #include "intel_screen.h"
47 #include "intel_context.h"
48 #include "intel_tris.h"
49 #include "intel_batchbuffer.h"
50 #include "intel_reg.h"
51
52 /*
53 * Render unclipped vertex buffers by emitting vertices directly to
54 * dma buffers. Use strip/fan hardware primitives where possible.
55 * Try to simulate missing primitives with indexed vertices.
56 */
57 #define HAVE_POINTS 0 /* Has it, but can't use because subpixel has to
58 * be adjusted for points on the INTEL/I845G
59 */
60 #define HAVE_LINES 1
61 #define HAVE_LINE_STRIPS 1
62 #define HAVE_TRIANGLES 1
63 #define HAVE_TRI_STRIPS 1
64 #define HAVE_TRI_STRIP_1 0 /* has it, template can't use it yet */
65 #define HAVE_TRI_FANS 1
66 #define HAVE_POLYGONS 1
67 #define HAVE_QUADS 0
68 #define HAVE_QUAD_STRIPS 0
69
70 #define HAVE_ELTS 0
71
72 static uint32_t hw_prim[GL_POLYGON + 1] = {
73 0,
74 PRIM3D_LINELIST,
75 PRIM3D_LINESTRIP,
76 PRIM3D_LINESTRIP,
77 PRIM3D_TRILIST,
78 PRIM3D_TRISTRIP,
79 PRIM3D_TRIFAN,
80 0,
81 0,
82 PRIM3D_POLY
83 };
84
85 static const GLenum reduced_prim[GL_POLYGON + 1] = {
86 GL_POINTS,
87 GL_LINES,
88 GL_LINES,
89 GL_LINES,
90 GL_TRIANGLES,
91 GL_TRIANGLES,
92 GL_TRIANGLES,
93 GL_TRIANGLES,
94 GL_TRIANGLES,
95 GL_TRIANGLES
96 };
97
98 static const int scale_prim[GL_POLYGON + 1] = {
99 0, /* fallback case */
100 1,
101 2,
102 2,
103 1,
104 3,
105 3,
106 0, /* fallback case */
107 0, /* fallback case */
108 3
109 };
110
111
112 static void
113 intelDmaPrimitive(struct intel_context *intel, GLenum prim)
114 {
115 if (0)
116 fprintf(stderr, "%s %s\n", __func__, _mesa_enum_to_string(prim));
117 INTEL_FIREVERTICES(intel);
118 intel->vtbl.reduced_primitive_state(intel, reduced_prim[prim]);
119 intel_set_prim(intel, hw_prim[prim]);
120 }
121
122 #define INTEL_NO_VBO_STATE_RESERVED 1500
123
124 static inline GLuint intel_get_vb_max(struct intel_context *intel)
125 {
126 GLuint ret;
127
128 if (intel->intelScreen->no_vbo) {
129 ret = intel->batch.bo->size - INTEL_NO_VBO_STATE_RESERVED;
130 } else
131 ret = INTEL_VB_SIZE;
132 ret /= (intel->vertex_size * 4);
133 return ret;
134 }
135
136 static inline GLuint intel_get_current_max(struct intel_context *intel)
137 {
138 GLuint ret;
139
140 if (intel->intelScreen->no_vbo) {
141 ret = intel_batchbuffer_space(intel);
142 ret = ret <= INTEL_NO_VBO_STATE_RESERVED ? 0 : ret - INTEL_NO_VBO_STATE_RESERVED;
143 } else
144 ret = (INTEL_VB_SIZE - intel->prim.current_offset);
145
146 return ret / (intel->vertex_size * 4);
147 }
148
149 #define LOCAL_VARS struct intel_context *intel = intel_context(ctx)
150 #define INIT( prim ) \
151 do { \
152 intelDmaPrimitive( intel, prim ); \
153 } while (0)
154
155 #define FLUSH() INTEL_FIREVERTICES(intel)
156
157 #define GET_SUBSEQUENT_VB_MAX_VERTS() intel_get_vb_max(intel)
158 #define GET_CURRENT_VB_MAX_VERTS() intel_get_current_max(intel)
159
160 #define ALLOC_VERTS(nr) intel_get_prim_space(intel, nr)
161
162 #define EMIT_VERTS( ctx, j, nr, buf ) \
163 _tnl_emit_vertices_to_buffer(ctx, j, (j)+(nr), buf )
164
165 #define TAG(x) intel_##x
166 #include "tnl_dd/t_dd_dmatmp.h"
167
168
169 /**********************************************************************/
170 /* Render pipeline stage */
171 /**********************************************************************/
172
173 /* Heuristic to choose between the two render paths:
174 */
175 static bool
176 choose_render(struct intel_context *intel, struct vertex_buffer *VB)
177 {
178 int vertsz = intel->vertex_size;
179 int cost_render = 0;
180 int cost_fallback = 0;
181 int nr_prims = 0;
182 int nr_rprims = 0;
183 int nr_rverts = 0;
184 int rprim = intel->reduced_primitive;
185 int i = 0;
186
187 for (i = 0; i < VB->PrimitiveCount; i++) {
188 GLuint prim = VB->Primitive[i].mode;
189 GLuint length = VB->Primitive[i].count;
190
191 if (!length)
192 continue;
193
194 nr_prims++;
195 nr_rverts += length * scale_prim[prim & PRIM_MODE_MASK];
196
197 if (reduced_prim[prim & PRIM_MODE_MASK] != rprim) {
198 nr_rprims++;
199 rprim = reduced_prim[prim & PRIM_MODE_MASK];
200 }
201 }
202
203 /* One point for each generated primitive:
204 */
205 cost_render = nr_prims;
206 cost_fallback = nr_rprims;
207
208 /* One point for every 1024 dwords (4k) of dma:
209 */
210 cost_render += (vertsz * i) / 1024;
211 cost_fallback += (vertsz * nr_rverts) / 1024;
212
213 if (0)
214 fprintf(stderr, "cost render: %d fallback: %d\n",
215 cost_render, cost_fallback);
216
217 if (cost_render > cost_fallback)
218 return false;
219
220 return true;
221 }
222
223
224 static GLboolean
225 intel_run_render(struct gl_context * ctx, struct tnl_pipeline_stage *stage)
226 {
227 struct intel_context *intel = intel_context(ctx);
228 TNLcontext *tnl = TNL_CONTEXT(ctx);
229 struct vertex_buffer *VB = &tnl->vb;
230 GLuint i;
231
232 intel->vtbl.render_prevalidate( intel );
233
234 /* Don't handle clipping or indexed vertices.
235 */
236 if (intel->RenderIndex != 0 ||
237 !intel_validate_render(ctx, VB) || !choose_render(intel, VB)) {
238 return true;
239 }
240
241 tnl->clipspace.new_inputs |= VERT_BIT_POS;
242
243 tnl->Driver.Render.Start(ctx);
244
245 for (i = 0; i < VB->PrimitiveCount; i++) {
246 GLuint prim = _tnl_translate_prim(&VB->Primitive[i]);
247 GLuint start = VB->Primitive[i].start;
248 GLuint length = VB->Primitive[i].count;
249
250 if (!length)
251 continue;
252
253 intel_render_tab_verts[prim & PRIM_MODE_MASK] (ctx, start,
254 length, prim);
255 }
256
257 tnl->Driver.Render.Finish(ctx);
258
259 INTEL_FIREVERTICES(intel);
260
261 return false; /* finished the pipe */
262 }
263
264 static const struct tnl_pipeline_stage _intel_render_stage = {
265 "intel render",
266 NULL,
267 NULL,
268 NULL,
269 NULL,
270 intel_run_render /* run */
271 };
272
273 const struct tnl_pipeline_stage *intel_pipeline[] = {
274 &_tnl_vertex_transform_stage,
275 &_tnl_normal_transform_stage,
276 &_tnl_lighting_stage,
277 &_tnl_fog_coordinate_stage,
278 &_tnl_texgen_stage,
279 &_tnl_texture_transform_stage,
280 &_tnl_point_attenuation_stage,
281 &_tnl_vertex_program_stage,
282 #if 1
283 &_intel_render_stage, /* ADD: unclipped rastersetup-to-dma */
284 #endif
285 &_tnl_render_stage,
286 0,
287 };