Merge branch 'mesa_7_5_branch' into mesa_7_6_branch
[mesa.git] / src / mesa / drivers / dri / r600 / r700_render.c
1 /*
2 * Copyright (C) 2008-2009 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
18 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
19 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
20 */
21
22 /*
23 * Authors:
24 * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
25 * CooperYuan <cooper.yuan@amd.com>, <cooperyuan@gmail.com>
26 */
27
28 #include "main/glheader.h"
29 #include "main/state.h"
30 #include "main/imports.h"
31 #include "main/enums.h"
32 #include "main/macros.h"
33 #include "main/context.h"
34 #include "main/dd.h"
35 #include "main/simple_list.h"
36 #include "main/api_arrayelt.h"
37 #include "swrast/swrast.h"
38 #include "swrast_setup/swrast_setup.h"
39 #include "vbo/vbo.h"
40
41 #include "tnl/tnl.h"
42 #include "tnl/t_vp_build.h"
43 #include "tnl/t_context.h"
44 #include "tnl/t_vertex.h"
45 #include "tnl/t_pipeline.h"
46
47 #include "r600_context.h"
48 #include "r600_cmdbuf.h"
49
50 #include "r600_tex.h"
51
52 #include "r700_vertprog.h"
53 #include "r700_fragprog.h"
54 #include "r700_state.h"
55
56 #include "radeon_common_context.h"
57
58 void r700WaitForIdle(context_t *context);
59 void r700WaitForIdleClean(context_t *context);
60 GLboolean r700SendTextureState(context_t *context);
61 static unsigned int r700PrimitiveType(int prim);
62 void r600UpdateTextureState(GLcontext * ctx);
63 GLboolean r700SyncSurf(context_t *context,
64 struct radeon_bo *pbo,
65 uint32_t read_domain,
66 uint32_t write_domain,
67 uint32_t sync_type);
68
69 void r700WaitForIdle(context_t *context)
70 {
71 BATCH_LOCALS(&context->radeon);
72 radeon_print(RADEON_RENDER | RADEON_STATE, RADEON_TRACE, "%s\n", __func__);
73 BEGIN_BATCH_NO_AUTOSTATE(3);
74
75 R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1));
76 R600_OUT_BATCH(mmWAIT_UNTIL - ASIC_CONFIG_BASE_INDEX);
77 R600_OUT_BATCH(WAIT_3D_IDLE_bit);
78
79 END_BATCH();
80 COMMIT_BATCH();
81 }
82
83 void r700WaitForIdleClean(context_t *context)
84 {
85 BATCH_LOCALS(&context->radeon);
86 radeon_print(RADEON_RENDER | RADEON_STATE, RADEON_TRACE, "%s\n", __func__);
87 BEGIN_BATCH_NO_AUTOSTATE(5);
88
89 R600_OUT_BATCH(CP_PACKET3(R600_IT_EVENT_WRITE, 0));
90 R600_OUT_BATCH(CACHE_FLUSH_AND_INV_EVENT);
91
92 R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1));
93 R600_OUT_BATCH(mmWAIT_UNTIL - ASIC_CONFIG_BASE_INDEX);
94 R600_OUT_BATCH(WAIT_3D_IDLE_bit | WAIT_3D_IDLECLEAN_bit);
95
96 END_BATCH();
97 COMMIT_BATCH();
98 }
99
100 void r700Start3D(context_t *context)
101 {
102 BATCH_LOCALS(&context->radeon);
103 radeon_print(RADEON_RENDER | RADEON_STATE, RADEON_TRACE, "%s\n", __func__);
104 if (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770)
105 {
106 BEGIN_BATCH_NO_AUTOSTATE(2);
107 R600_OUT_BATCH(CP_PACKET3(R600_IT_START_3D_CMDBUF, 0));
108 R600_OUT_BATCH(0);
109 END_BATCH();
110 }
111
112 BEGIN_BATCH_NO_AUTOSTATE(3);
113 R600_OUT_BATCH(CP_PACKET3(R600_IT_CONTEXT_CONTROL, 1));
114 R600_OUT_BATCH(0x80000000);
115 R600_OUT_BATCH(0x80000000);
116 END_BATCH();
117
118 COMMIT_BATCH();
119
120 r700WaitForIdleClean(context);
121 }
122
123 GLboolean r700SyncSurf(context_t *context,
124 struct radeon_bo *pbo,
125 uint32_t read_domain,
126 uint32_t write_domain,
127 uint32_t sync_type)
128 {
129 BATCH_LOCALS(&context->radeon);
130 radeon_print(RADEON_RENDER | RADEON_STATE, RADEON_TRACE, "%s\n", __func__);
131 uint32_t cp_coher_size;
132
133 if (!pbo)
134 return GL_FALSE;
135
136 if (pbo->size == 0xffffffff)
137 cp_coher_size = 0xffffffff;
138 else
139 cp_coher_size = ((pbo->size + 255) >> 8);
140
141 BEGIN_BATCH_NO_AUTOSTATE(5 + 2);
142 R600_OUT_BATCH(CP_PACKET3(R600_IT_SURFACE_SYNC, 3));
143 R600_OUT_BATCH(sync_type);
144 R600_OUT_BATCH(cp_coher_size);
145 R600_OUT_BATCH(0);
146 R600_OUT_BATCH(10);
147 R600_OUT_BATCH_RELOC(0,
148 pbo,
149 0,
150 read_domain, write_domain, 0);
151 END_BATCH();
152 COMMIT_BATCH();
153
154 return GL_TRUE;
155 }
156
157 static unsigned int r700PrimitiveType(int prim)
158 {
159 switch (prim & PRIM_MODE_MASK)
160 {
161 case GL_POINTS:
162 return DI_PT_POINTLIST;
163 break;
164 case GL_LINES:
165 return DI_PT_LINELIST;
166 break;
167 case GL_LINE_STRIP:
168 return DI_PT_LINESTRIP;
169 break;
170 case GL_LINE_LOOP:
171 return DI_PT_LINELOOP;
172 break;
173 case GL_TRIANGLES:
174 return DI_PT_TRILIST;
175 break;
176 case GL_TRIANGLE_STRIP:
177 return DI_PT_TRISTRIP;
178 break;
179 case GL_TRIANGLE_FAN:
180 return DI_PT_TRIFAN;
181 break;
182 case GL_QUADS:
183 return DI_PT_QUADLIST;
184 break;
185 case GL_QUAD_STRIP:
186 return DI_PT_QUADSTRIP;
187 break;
188 case GL_POLYGON:
189 return DI_PT_POLYGON;
190 break;
191 default:
192 assert(0);
193 return -1;
194 break;
195 }
196 }
197
198 static int r700NumVerts(int num_verts, int prim)
199 {
200 int verts_off = 0;
201
202 switch (prim & PRIM_MODE_MASK) {
203 case GL_POINTS:
204 verts_off = 0;
205 break;
206 case GL_LINES:
207 verts_off = num_verts % 2;
208 break;
209 case GL_LINE_STRIP:
210 if (num_verts < 2)
211 verts_off = num_verts;
212 break;
213 case GL_LINE_LOOP:
214 if (num_verts < 2)
215 verts_off = num_verts;
216 break;
217 case GL_TRIANGLES:
218 verts_off = num_verts % 3;
219 break;
220 case GL_TRIANGLE_STRIP:
221 if (num_verts < 3)
222 verts_off = num_verts;
223 break;
224 case GL_TRIANGLE_FAN:
225 if (num_verts < 3)
226 verts_off = num_verts;
227 break;
228 case GL_QUADS:
229 verts_off = num_verts % 4;
230 break;
231 case GL_QUAD_STRIP:
232 if (num_verts < 4)
233 verts_off = num_verts;
234 else
235 verts_off = num_verts % 2;
236 break;
237 case GL_POLYGON:
238 if (num_verts < 3)
239 verts_off = num_verts;
240 break;
241 default:
242 assert(0);
243 return -1;
244 break;
245 }
246
247 return num_verts - verts_off;
248 }
249
250 static void r700RunRenderPrimitive(GLcontext * ctx, int start, int end, int prim)
251 {
252 context_t *context = R700_CONTEXT(ctx);
253 BATCH_LOCALS(&context->radeon);
254 int type, i, total_emit;
255 int num_indices;
256 uint32_t vgt_draw_initiator = 0;
257 uint32_t vgt_index_type = 0;
258 uint32_t vgt_primitive_type = 0;
259 uint32_t vgt_num_indices = 0;
260 TNLcontext *tnl = TNL_CONTEXT(ctx);
261 struct vertex_buffer *vb = &tnl->vb;
262
263 type = r700PrimitiveType(prim);
264 num_indices = r700NumVerts(end - start, prim);
265
266 radeon_print(RADEON_RENDER, RADEON_TRACE,
267 "%s type %x num_indices %d\n",
268 __func__, type, num_indices);
269
270 if (type < 0 || num_indices <= 0)
271 return;
272
273 total_emit = 3 /* VGT_PRIMITIVE_TYPE */
274 + 2 /* VGT_INDEX_TYPE */
275 + 2 /* NUM_INSTANCES */
276 + num_indices + 3; /* DRAW_INDEX_IMMD */
277
278 BEGIN_BATCH_NO_AUTOSTATE(total_emit);
279 // prim
280 SETfield(vgt_primitive_type, type,
281 VGT_PRIMITIVE_TYPE__PRIM_TYPE_shift, VGT_PRIMITIVE_TYPE__PRIM_TYPE_mask);
282 R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1));
283 R600_OUT_BATCH(mmVGT_PRIMITIVE_TYPE - ASIC_CONFIG_BASE_INDEX);
284 R600_OUT_BATCH(vgt_primitive_type);
285
286 // index type
287 SETfield(vgt_index_type, DI_INDEX_SIZE_32_BIT, INDEX_TYPE_shift, INDEX_TYPE_mask);
288 R600_OUT_BATCH(CP_PACKET3(R600_IT_INDEX_TYPE, 0));
289 R600_OUT_BATCH(vgt_index_type);
290
291 // num instances
292 R600_OUT_BATCH(CP_PACKET3(R600_IT_NUM_INSTANCES, 0));
293 R600_OUT_BATCH(1);
294
295 // draw packet
296 vgt_num_indices = num_indices;
297 SETfield(vgt_draw_initiator, DI_SRC_SEL_IMMEDIATE, SOURCE_SELECT_shift, SOURCE_SELECT_mask);
298 SETfield(vgt_draw_initiator, DI_MAJOR_MODE_0, MAJOR_MODE_shift, MAJOR_MODE_mask);
299
300 R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX_IMMD, (num_indices + 1)));
301 R600_OUT_BATCH(vgt_num_indices);
302 R600_OUT_BATCH(vgt_draw_initiator);
303
304 for (i = start; i < (start + num_indices); i++) {
305 if(vb->Elts)
306 R600_OUT_BATCH(vb->Elts[i]);
307 else
308 R600_OUT_BATCH(i);
309 }
310 END_BATCH();
311 COMMIT_BATCH();
312
313 }
314
315 /* start 3d, idle, cb/db flush */
316 #define PRE_EMIT_STATE_BUFSZ 10 + 5 + 14
317
318 static GLuint r700PredictRenderSize(GLcontext* ctx)
319 {
320 context_t *context = R700_CONTEXT(ctx);
321 TNLcontext *tnl = TNL_CONTEXT(ctx);
322 struct r700_vertex_program *vp = context->selected_vp;
323 struct vertex_buffer *vb = &tnl->vb;
324 GLboolean flushed;
325 GLuint dwords, i;
326 GLuint state_size;
327 /* pre calculate aos count so state prediction works */
328 context->radeon.tcl.aos_count = _mesa_bitcount(vp->mesa_program->Base.InputsRead);
329
330 dwords = PRE_EMIT_STATE_BUFSZ;
331 for (i = 0; i < vb->PrimitiveCount; i++)
332 dwords += vb->Primitive[i].count + 10;
333 state_size = radeonCountStateEmitSize(&context->radeon);
334 flushed = rcommonEnsureCmdBufSpace(&context->radeon,
335 dwords + state_size, __FUNCTION__);
336
337 if (flushed)
338 dwords += radeonCountStateEmitSize(&context->radeon);
339 else
340 dwords += state_size;
341
342 radeon_print(RADEON_RENDER, RADEON_VERBOSE,
343 "%s: total prediction size is %d.\n", __FUNCTION__, dwords);
344 return dwords;
345 }
346
347 static GLboolean r700RunRender(GLcontext * ctx,
348 struct tnl_pipeline_stage *stage)
349 {
350 context_t *context = R700_CONTEXT(ctx);
351 radeonContextPtr radeon = &context->radeon;
352 unsigned int i, id = 0;
353 TNLcontext *tnl = TNL_CONTEXT(ctx);
354 struct vertex_buffer *vb = &tnl->vb;
355 struct radeon_renderbuffer *rrb;
356
357 radeon_print(RADEON_RENDER, RADEON_NORMAL, "%s: cs begin at %d\n",
358 __func__, context->radeon.cmdbuf.cs->cdw);
359
360 /* always emit CB base to prevent
361 * lock ups on some chips.
362 */
363 R600_STATECHANGE(context, cb_target);
364 /* mark vtx as dirty since it changes per-draw */
365 R600_STATECHANGE(context, vtx);
366
367 r700SetScissor(context);
368 r700SetupVertexProgram(ctx);
369 r700SetupFragmentProgram(ctx);
370 r600UpdateTextureState(ctx);
371
372 GLuint emit_end = r700PredictRenderSize(ctx)
373 + context->radeon.cmdbuf.cs->cdw;
374 r700SetupStreams(ctx);
375
376 radeonEmitState(radeon);
377
378 radeon_debug_add_indent();
379 /* richard test code */
380 for (i = 0; i < vb->PrimitiveCount; i++) {
381 GLuint prim = _tnl_translate_prim(&vb->Primitive[i]);
382 GLuint start = vb->Primitive[i].start;
383 GLuint end = vb->Primitive[i].start + vb->Primitive[i].count;
384 r700RunRenderPrimitive(ctx, start, end, prim);
385 }
386 radeon_debug_remove_indent();
387
388 /* Flush render op cached for last several quads. */
389 r700WaitForIdleClean(context);
390
391 rrb = radeon_get_colorbuffer(&context->radeon);
392 if (rrb && rrb->bo)
393 r700SyncSurf(context, rrb->bo, 0, RADEON_GEM_DOMAIN_VRAM,
394 CB_ACTION_ENA_bit | (1 << (id + 6)));
395
396 rrb = radeon_get_depthbuffer(&context->radeon);
397 if (rrb && rrb->bo)
398 r700SyncSurf(context, rrb->bo, 0, RADEON_GEM_DOMAIN_VRAM,
399 DB_ACTION_ENA_bit | DB_DEST_BASE_ENA_bit);
400
401 radeonReleaseArrays(ctx, ~0);
402
403 radeon_print(RADEON_RENDER, RADEON_TRACE, "%s: cs end at %d\n",
404 __func__, context->radeon.cmdbuf.cs->cdw);
405
406 if ( emit_end < context->radeon.cmdbuf.cs->cdw )
407 WARN_ONCE("Rendering was %d commands larger than predicted size."
408 " We might overflow command buffer.\n", context->radeon.cmdbuf.cs->cdw - emit_end);
409
410 return GL_FALSE;
411 }
412
413 static GLboolean r700RunNonTCLRender(GLcontext * ctx,
414 struct tnl_pipeline_stage *stage) /* -------------------- */
415 {
416 GLboolean bRet = GL_TRUE;
417
418 return bRet;
419 }
420
421 static GLboolean r700RunTCLRender(GLcontext * ctx, /*----------------------*/
422 struct tnl_pipeline_stage *stage)
423 {
424 GLboolean bRet = GL_FALSE;
425
426 /* TODO : sw fallback */
427
428 /* Need shader bo's setup before bo check */
429 r700UpdateShaders(ctx);
430 /**
431
432 * Ensure all enabled and complete textures are uploaded along with any buffers being used.
433 */
434 if(!r600ValidateBuffers(ctx))
435 {
436 return GL_TRUE;
437 }
438
439 bRet = r700RunRender(ctx, stage);
440
441 return bRet;
442 //GL_FALSE will stop to do other pipe stage in _tnl_run_pipeline
443 //The render here DOES finish the whole pipe, so GL_FALSE should be returned for success.
444 }
445
446 const struct tnl_pipeline_stage _r700_render_stage = {
447 "r700 Hardware Rasterization",
448 NULL,
449 NULL,
450 NULL,
451 NULL,
452 r700RunNonTCLRender
453 };
454
455 const struct tnl_pipeline_stage _r700_tcl_stage = {
456 "r700 Hardware Transform, Clipping and Lighting",
457 NULL,
458 NULL,
459 NULL,
460 NULL,
461 r700RunTCLRender
462 };
463
464 const struct tnl_pipeline_stage *r700_pipeline[] =
465 {
466 &_r700_tcl_stage,
467 &_tnl_vertex_transform_stage,
468 &_tnl_normal_transform_stage,
469 &_tnl_lighting_stage,
470 &_tnl_fog_coordinate_stage,
471 &_tnl_texgen_stage,
472 &_tnl_texture_transform_stage,
473 &_tnl_vertex_program_stage,
474
475 &_r700_render_stage,
476 &_tnl_render_stage,
477 0,
478 };
479
480