r300: bo and cs abstraction.
[mesa.git] / src / mesa / drivers / dri / r300 / r300_render.c
1 /**************************************************************************
2
3 Copyright (C) 2004 Nicolai Haehnle.
4
5 All Rights Reserved.
6
7 Permission is hereby granted, free of charge, to any person obtaining a
8 copy of this software and associated documentation files (the "Software"),
9 to deal in the Software without restriction, including without limitation
10 on the rights to use, copy, modify, merge, publish, distribute, sub
11 license, and/or sell copies of the Software, and to permit persons to whom
12 the Software is furnished to do so, subject to the following conditions:
13
14 The above copyright notice and this permission notice (including the next
15 paragraph) shall be included in all copies or substantial portions of the
16 Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **************************************************************************/
27
28 /**
29 * \file
30 *
31 * \brief R300 Render (Vertex Buffer Implementation)
32 *
33 * The immediate implementation has been removed from CVS in favor of the vertex
34 * buffer implementation.
35 *
36 * The render functions are called by the pipeline manager to render a batch of
37 * primitives. They return TRUE to pass on to the next stage (i.e. software
38 * rasterization) or FALSE to indicate that the pipeline has finished after
39 * rendering something.
40 *
41 * When falling back to software TCL still attempt to use hardware
42 * rasterization.
43 *
44 * I am not sure that the cache related registers are setup correctly, but
45 * obviously this does work... Further investigation is needed.
46 *
47 * \author Nicolai Haehnle <prefect_@gmx.net>
48 *
49 * \todo Add immediate implementation back? Perhaps this is useful if there are
50 * no bugs...
51 */
52
53 #include "main/glheader.h"
54 #include "main/state.h"
55 #include "main/imports.h"
56 #include "main/enums.h"
57 #include "main/macros.h"
58 #include "main/context.h"
59 #include "main/dd.h"
60 #include "main/simple_list.h"
61 #include "main/api_arrayelt.h"
62 #include "swrast/swrast.h"
63 #include "swrast_setup/swrast_setup.h"
64 #include "vbo/vbo.h"
65 #include "tnl/tnl.h"
66 #include "tnl/t_vp_build.h"
67 #include "radeon_reg.h"
68 #include "radeon_macros.h"
69 #include "radeon_ioctl.h"
70 #include "radeon_state.h"
71 #include "r300_context.h"
72 #include "r300_ioctl.h"
73 #include "r300_state.h"
74 #include "r300_reg.h"
75 #include "r300_tex.h"
76 #include "r300_emit.h"
77 #include "r300_fragprog.h"
78 extern int future_hw_tcl_on;
79
80 /**
81 * \brief Convert a OpenGL primitive type into a R300 primitive type.
82 */
83 int r300PrimitiveType(r300ContextPtr rmesa, int prim)
84 {
85 switch (prim & PRIM_MODE_MASK) {
86 case GL_POINTS:
87 return R300_VAP_VF_CNTL__PRIM_POINTS;
88 break;
89 case GL_LINES:
90 return R300_VAP_VF_CNTL__PRIM_LINES;
91 break;
92 case GL_LINE_STRIP:
93 return R300_VAP_VF_CNTL__PRIM_LINE_STRIP;
94 break;
95 case GL_LINE_LOOP:
96 return R300_VAP_VF_CNTL__PRIM_LINE_LOOP;
97 break;
98 case GL_TRIANGLES:
99 return R300_VAP_VF_CNTL__PRIM_TRIANGLES;
100 break;
101 case GL_TRIANGLE_STRIP:
102 return R300_VAP_VF_CNTL__PRIM_TRIANGLE_STRIP;
103 break;
104 case GL_TRIANGLE_FAN:
105 return R300_VAP_VF_CNTL__PRIM_TRIANGLE_FAN;
106 break;
107 case GL_QUADS:
108 return R300_VAP_VF_CNTL__PRIM_QUADS;
109 break;
110 case GL_QUAD_STRIP:
111 return R300_VAP_VF_CNTL__PRIM_QUAD_STRIP;
112 break;
113 case GL_POLYGON:
114 return R300_VAP_VF_CNTL__PRIM_POLYGON;
115 break;
116 default:
117 assert(0);
118 return -1;
119 break;
120 }
121 }
122
123 int r300NumVerts(r300ContextPtr rmesa, int num_verts, int prim)
124 {
125 int verts_off = 0;
126
127 switch (prim & PRIM_MODE_MASK) {
128 case GL_POINTS:
129 verts_off = 0;
130 break;
131 case GL_LINES:
132 verts_off = num_verts % 2;
133 break;
134 case GL_LINE_STRIP:
135 if (num_verts < 2)
136 verts_off = num_verts;
137 break;
138 case GL_LINE_LOOP:
139 if (num_verts < 2)
140 verts_off = num_verts;
141 break;
142 case GL_TRIANGLES:
143 verts_off = num_verts % 3;
144 break;
145 case GL_TRIANGLE_STRIP:
146 if (num_verts < 3)
147 verts_off = num_verts;
148 break;
149 case GL_TRIANGLE_FAN:
150 if (num_verts < 3)
151 verts_off = num_verts;
152 break;
153 case GL_QUADS:
154 verts_off = num_verts % 4;
155 break;
156 case GL_QUAD_STRIP:
157 if (num_verts < 4)
158 verts_off = num_verts;
159 else
160 verts_off = num_verts % 2;
161 break;
162 case GL_POLYGON:
163 if (num_verts < 3)
164 verts_off = num_verts;
165 break;
166 default:
167 assert(0);
168 return -1;
169 break;
170 }
171
172 return num_verts - verts_off;
173 }
174
175 static void r300EmitElts(GLcontext * ctx, void *elts, unsigned long n_elts)
176 {
177 r300ContextPtr rmesa = R300_CONTEXT(ctx);
178 void *out;
179
180 rmesa->state.elt_dma_bo = radeon_bo_open(rmesa->radeon.radeonScreen->bom,
181 0, n_elts * 4, 4,
182 RADEON_GEM_DOMAIN_GTT);
183 rmesa->state.elt_dma_offset = 0;
184 radeon_bo_map(rmesa->state.elt_dma_bo, 1);
185 out = rmesa->state.elt_dma_bo->ptr + rmesa->state.elt_dma_offset;
186 memcpy(out, elts, n_elts * 4);
187 radeon_bo_unmap(rmesa->state.elt_dma_bo);
188 }
189
190 static void r300FireEB(r300ContextPtr rmesa, int vertex_count, int type)
191 {
192 BATCH_LOCALS(rmesa);
193
194 if (vertex_count > 0) {
195 BEGIN_BATCH(8);
196 OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_INDX_2, 0);
197 OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_INDICES |
198 ((vertex_count + 0) << 16) |
199 type |
200 R300_VAP_VF_CNTL__INDEX_SIZE_32bit);
201
202 OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER, 2);
203 OUT_BATCH(R300_EB_UNK1 | (0 << 16) | R300_EB_UNK2);
204 OUT_BATCH_RELOC(0, rmesa->state.elt_dma_bo,
205 rmesa->state.elt_dma_offset, 0);
206 OUT_BATCH(vertex_count);
207 END_BATCH();
208 }
209 }
210
211 static void r300EmitAOS(r300ContextPtr rmesa, GLuint nr, GLuint offset)
212 {
213 BATCH_LOCALS(rmesa);
214 uint32_t voffset;
215 int sz = 1 + (nr >> 1) * 3 + (nr & 1) * 2;
216 int i;
217
218 if (RADEON_DEBUG & DEBUG_VERTS)
219 fprintf(stderr, "%s: nr=%d, ofs=0x%08x\n", __FUNCTION__, nr,
220 offset);
221
222 BEGIN_BATCH(sz+2);
223 OUT_BATCH_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, sz - 1);
224 OUT_BATCH(nr);
225
226 for (i = 0; i + 1 < nr; i += 2) {
227 OUT_BATCH((rmesa->state.aos[i].components << 0) |
228 (rmesa->state.aos[i].stride << 8) |
229 (rmesa->state.aos[i + 1].components << 16) |
230 (rmesa->state.aos[i + 1].stride << 24));
231
232 voffset = rmesa->state.aos[i + 0].offset +
233 offset * 4 * rmesa->state.aos[i + 0].stride;
234 OUT_BATCH_RELOC(0, rmesa->state.aos[i].bo, voffset, 0);
235 voffset = rmesa->state.aos[i + 1].offset +
236 offset * 4 * rmesa->state.aos[i + 1].stride;
237 OUT_BATCH_RELOC(0, rmesa->state.aos[i+1].bo, voffset, 0);
238 }
239
240 if (nr & 1) {
241 OUT_BATCH((rmesa->state.aos[nr - 1].components << 0) |
242 (rmesa->state.aos[nr - 1].stride << 8));
243 OUT_BATCH_RELOC(0, rmesa->state.aos[nr - 1].bo,
244 rmesa->state.aos[nr - 1].offset + offset * 4 * rmesa->state.aos[nr - 1].stride, 0);
245 }
246 END_BATCH();
247 }
248
249 static void r300FireAOS(r300ContextPtr rmesa, int vertex_count, int type)
250 {
251 BATCH_LOCALS(rmesa);
252
253 BEGIN_BATCH(3);
254 OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2, 0);
255 OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (vertex_count << 16) | type);
256 END_BATCH();
257 }
258
259 static void r300RunRenderPrimitive(r300ContextPtr rmesa, GLcontext * ctx,
260 int start, int end, int prim)
261 {
262 BATCH_LOCALS(rmesa);
263 int type, num_verts;
264 TNLcontext *tnl = TNL_CONTEXT(ctx);
265 struct vertex_buffer *vb = &tnl->vb;
266
267 type = r300PrimitiveType(rmesa, prim);
268 num_verts = r300NumVerts(rmesa, end - start, prim);
269
270 if (type < 0 || num_verts <= 0)
271 return;
272
273 /* Make space for at least 64 dwords.
274 * This is supposed to ensure that we can get all rendering
275 * commands into a single command buffer.
276 */
277 r300EnsureCmdBufSpace(rmesa, 64, __FUNCTION__);
278
279 if (vb->Elts) {
280 if (num_verts > 65535) {
281 /* not implemented yet */
282 WARN_ONCE("Too many elts\n");
283 return;
284 }
285 /* Note: The following is incorrect, but it's the best I can do
286 * without a major refactoring of how DMA memory is handled.
287 * The problem: Ensuring that both vertex arrays *and* index
288 * arrays are at the right position, and then ensuring that
289 * the LOAD_VBPNTR, DRAW_INDX and INDX_BUFFER packets are emitted
290 * at once.
291 *
292 * So why is the following incorrect? Well, it seems like
293 * allocating the index array might actually evict the vertex
294 * arrays. *sigh*
295 */
296 r300EmitElts(ctx, vb->Elts, num_verts);
297 r300EmitAOS(rmesa, rmesa->state.aos_count, start);
298 r300FireEB(rmesa, num_verts, type);
299 } else {
300 r300EmitAOS(rmesa, rmesa->state.aos_count, start);
301 r300FireAOS(rmesa, num_verts, type);
302 }
303 COMMIT_BATCH();
304 }
305
306 static GLboolean r300RunRender(GLcontext * ctx,
307 struct tnl_pipeline_stage *stage)
308 {
309 r300ContextPtr rmesa = R300_CONTEXT(ctx);
310 int i;
311 TNLcontext *tnl = TNL_CONTEXT(ctx);
312 struct vertex_buffer *vb = &tnl->vb;
313
314 if (RADEON_DEBUG & DEBUG_PRIMS)
315 fprintf(stderr, "%s\n", __FUNCTION__);
316
317 r300UpdateShaders(rmesa);
318 if (r300EmitArrays(ctx))
319 return GL_TRUE;
320
321 r300UpdateShaderStates(rmesa);
322
323 r300EmitCacheFlush(rmesa);
324 r300EmitState(rmesa);
325
326 for (i = 0; i < vb->PrimitiveCount; i++) {
327 GLuint prim = _tnl_translate_prim(&vb->Primitive[i]);
328 GLuint start = vb->Primitive[i].start;
329 GLuint end = vb->Primitive[i].start + vb->Primitive[i].count;
330 r300RunRenderPrimitive(rmesa, ctx, start, end, prim);
331 }
332
333 r300EmitCacheFlush(rmesa);
334
335 r300ReleaseArrays(ctx);
336
337 return GL_FALSE;
338 }
339
340 #define FALLBACK_IF(expr) \
341 do { \
342 if (expr) { \
343 if (1 || RADEON_DEBUG & DEBUG_FALLBACKS) \
344 WARN_ONCE("Software fallback:%s\n", \
345 #expr); \
346 return R300_FALLBACK_RAST; \
347 } \
348 } while(0)
349
350 static int r300Fallback(GLcontext * ctx)
351 {
352 r300ContextPtr r300 = R300_CONTEXT(ctx);
353 /* Do we need to use new-style shaders?
354 * Also is there a better way to do this? */
355 if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) {
356 struct r500_fragment_program *fp = (struct r500_fragment_program *)
357 (char *)ctx->FragmentProgram._Current;
358 if (fp) {
359 if (!fp->translated) {
360 r500TranslateFragmentShader(r300, fp);
361 FALLBACK_IF(!fp->translated);
362 }
363 }
364 } else {
365 struct r300_fragment_program *fp = (struct r300_fragment_program *)
366 (char *)ctx->FragmentProgram._Current;
367 if (fp) {
368 if (!fp->translated) {
369 r300TranslateFragmentShader(r300, fp);
370 FALLBACK_IF(!fp->translated);
371 }
372 }
373 }
374
375 FALLBACK_IF(ctx->RenderMode != GL_RENDER);
376
377 FALLBACK_IF(ctx->Stencil._TestTwoSide
378 && (ctx->Stencil.Ref[0] != ctx->Stencil.Ref[1]
379 || ctx->Stencil.ValueMask[0] !=
380 ctx->Stencil.ValueMask[1]
381 || ctx->Stencil.WriteMask[0] !=
382 ctx->Stencil.WriteMask[1]));
383
384 if (ctx->Extensions.NV_point_sprite || ctx->Extensions.ARB_point_sprite)
385 FALLBACK_IF(ctx->Point.PointSprite);
386
387 if (!r300->disable_lowimpact_fallback) {
388 FALLBACK_IF(ctx->Polygon.StippleFlag);
389 FALLBACK_IF(ctx->Multisample._Enabled);
390 FALLBACK_IF(ctx->Line.StippleFlag);
391 FALLBACK_IF(ctx->Line.SmoothFlag);
392 FALLBACK_IF(ctx->Point.SmoothFlag);
393 }
394
395 return R300_FALLBACK_NONE;
396 }
397
398 static GLboolean r300RunNonTCLRender(GLcontext * ctx,
399 struct tnl_pipeline_stage *stage)
400 {
401 r300ContextPtr rmesa = R300_CONTEXT(ctx);
402
403 if (RADEON_DEBUG & DEBUG_PRIMS)
404 fprintf(stderr, "%s\n", __FUNCTION__);
405
406 if (r300Fallback(ctx) >= R300_FALLBACK_RAST)
407 return GL_TRUE;
408
409 if (!(rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL))
410 return GL_TRUE;
411
412 return r300RunRender(ctx, stage);
413 }
414
415 static GLboolean r300RunTCLRender(GLcontext * ctx,
416 struct tnl_pipeline_stage *stage)
417 {
418 r300ContextPtr rmesa = R300_CONTEXT(ctx);
419 struct r300_vertex_program *vp;
420
421 hw_tcl_on = future_hw_tcl_on;
422
423 if (RADEON_DEBUG & DEBUG_PRIMS)
424 fprintf(stderr, "%s\n", __FUNCTION__);
425
426 if (hw_tcl_on == GL_FALSE)
427 return GL_TRUE;
428
429 if (r300Fallback(ctx) >= R300_FALLBACK_TCL) {
430 hw_tcl_on = GL_FALSE;
431 return GL_TRUE;
432 }
433
434 r300UpdateShaders(rmesa);
435
436 vp = (struct r300_vertex_program *)CURRENT_VERTEX_SHADER(ctx);
437 if (vp->native == GL_FALSE) {
438 hw_tcl_on = GL_FALSE;
439 return GL_TRUE;
440 }
441
442 return r300RunRender(ctx, stage);
443 }
444
445 const struct tnl_pipeline_stage _r300_render_stage = {
446 "r300 Hardware Rasterization",
447 NULL,
448 NULL,
449 NULL,
450 NULL,
451 r300RunNonTCLRender
452 };
453
454 const struct tnl_pipeline_stage _r300_tcl_stage = {
455 "r300 Hardware Transform, Clipping and Lighting",
456 NULL,
457 NULL,
458 NULL,
459 NULL,
460 r300RunTCLRender
461 };