Merge branch 'master' of ssh://git.freedesktop.org/git/mesa/mesa into r600_state_predict
[mesa.git] / src / mesa / drivers / dri / r600 / r700_render.c
1 /*
2 * Copyright (C) 2008-2009 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
18 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
19 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
20 */
21
22 /*
23 * Authors:
24 * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
25 * CooperYuan <cooper.yuan@amd.com>, <cooperyuan@gmail.com>
26 */
27
28 #include "main/glheader.h"
29 #include "main/state.h"
30 #include "main/imports.h"
31 #include "main/enums.h"
32 #include "main/macros.h"
33 #include "main/context.h"
34 #include "main/dd.h"
35 #include "main/simple_list.h"
36 #include "main/api_arrayelt.h"
37 #include "swrast/swrast.h"
38 #include "swrast_setup/swrast_setup.h"
39 #include "vbo/vbo.h"
40
41 #include "tnl/tnl.h"
42 #include "tnl/t_vp_build.h"
43 #include "tnl/t_context.h"
44 #include "tnl/t_vertex.h"
45 #include "tnl/t_pipeline.h"
46
47 #include "r600_context.h"
48 #include "r600_cmdbuf.h"
49
50 #include "r600_tex.h"
51
52 #include "r700_vertprog.h"
53 #include "r700_fragprog.h"
54 #include "r700_state.h"
55
56 #include "radeon_common_context.h"
57
58 void r700WaitForIdle(context_t *context);
59 void r700WaitForIdleClean(context_t *context);
60 GLboolean r700SendTextureState(context_t *context);
61 static unsigned int r700PrimitiveType(int prim);
62 void r600UpdateTextureState(GLcontext * ctx);
63 GLboolean r700SyncSurf(context_t *context,
64 struct radeon_bo *pbo,
65 uint32_t read_domain,
66 uint32_t write_domain,
67 uint32_t sync_type);
68
69 void r700WaitForIdle(context_t *context)
70 {
71 BATCH_LOCALS(&context->radeon);
72 BEGIN_BATCH_NO_AUTOSTATE(3);
73
74 R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1));
75 R600_OUT_BATCH(mmWAIT_UNTIL - ASIC_CONFIG_BASE_INDEX);
76 R600_OUT_BATCH(WAIT_3D_IDLE_bit);
77
78 END_BATCH();
79 COMMIT_BATCH();
80 }
81
82 void r700WaitForIdleClean(context_t *context)
83 {
84 BATCH_LOCALS(&context->radeon);
85 BEGIN_BATCH_NO_AUTOSTATE(5);
86
87 R600_OUT_BATCH(CP_PACKET3(R600_IT_EVENT_WRITE, 0));
88 R600_OUT_BATCH(CACHE_FLUSH_AND_INV_EVENT);
89
90 R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1));
91 R600_OUT_BATCH(mmWAIT_UNTIL - ASIC_CONFIG_BASE_INDEX);
92 R600_OUT_BATCH(WAIT_3D_IDLE_bit | WAIT_3D_IDLECLEAN_bit);
93
94 END_BATCH();
95 COMMIT_BATCH();
96 }
97
98 void r700Start3D(context_t *context)
99 {
100 BATCH_LOCALS(&context->radeon);
101 if (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770)
102 {
103 BEGIN_BATCH_NO_AUTOSTATE(2);
104 R600_OUT_BATCH(CP_PACKET3(R600_IT_START_3D_CMDBUF, 0));
105 R600_OUT_BATCH(0);
106 END_BATCH();
107 }
108
109 BEGIN_BATCH_NO_AUTOSTATE(3);
110 R600_OUT_BATCH(CP_PACKET3(R600_IT_CONTEXT_CONTROL, 1));
111 R600_OUT_BATCH(0x80000000);
112 R600_OUT_BATCH(0x80000000);
113 END_BATCH();
114
115 COMMIT_BATCH();
116
117 r700WaitForIdleClean(context);
118 }
119
120 GLboolean r700SyncSurf(context_t *context,
121 struct radeon_bo *pbo,
122 uint32_t read_domain,
123 uint32_t write_domain,
124 uint32_t sync_type)
125 {
126 BATCH_LOCALS(&context->radeon);
127 uint32_t cp_coher_size;
128
129 if (!pbo)
130 return GL_FALSE;
131
132 if (pbo->size == 0xffffffff)
133 cp_coher_size = 0xffffffff;
134 else
135 cp_coher_size = ((pbo->size + 255) >> 8);
136
137 BEGIN_BATCH_NO_AUTOSTATE(5 + 2);
138 R600_OUT_BATCH(CP_PACKET3(R600_IT_SURFACE_SYNC, 3));
139 R600_OUT_BATCH(sync_type);
140 R600_OUT_BATCH(cp_coher_size);
141 R600_OUT_BATCH(0);
142 R600_OUT_BATCH(10);
143 R600_OUT_BATCH_RELOC(0,
144 pbo,
145 0,
146 read_domain, write_domain, 0); // ???
147
148 END_BATCH();
149 COMMIT_BATCH();
150
151 return GL_TRUE;
152 }
153
154 static unsigned int r700PrimitiveType(int prim)
155 {
156 switch (prim & PRIM_MODE_MASK)
157 {
158 case GL_POINTS:
159 return DI_PT_POINTLIST;
160 break;
161 case GL_LINES:
162 return DI_PT_LINELIST;
163 break;
164 case GL_LINE_STRIP:
165 return DI_PT_LINESTRIP;
166 break;
167 case GL_LINE_LOOP:
168 return DI_PT_LINELOOP;
169 break;
170 case GL_TRIANGLES:
171 return DI_PT_TRILIST;
172 break;
173 case GL_TRIANGLE_STRIP:
174 return DI_PT_TRISTRIP;
175 break;
176 case GL_TRIANGLE_FAN:
177 return DI_PT_TRIFAN;
178 break;
179 case GL_QUADS:
180 return DI_PT_QUADLIST;
181 break;
182 case GL_QUAD_STRIP:
183 return DI_PT_QUADSTRIP;
184 break;
185 case GL_POLYGON:
186 return DI_PT_POLYGON;
187 break;
188 default:
189 assert(0);
190 return -1;
191 break;
192 }
193 }
194
195 static int r700NumVerts(int num_verts, int prim)
196 {
197 int verts_off = 0;
198
199 switch (prim & PRIM_MODE_MASK) {
200 case GL_POINTS:
201 verts_off = 0;
202 break;
203 case GL_LINES:
204 verts_off = num_verts % 2;
205 break;
206 case GL_LINE_STRIP:
207 if (num_verts < 2)
208 verts_off = num_verts;
209 break;
210 case GL_LINE_LOOP:
211 if (num_verts < 2)
212 verts_off = num_verts;
213 break;
214 case GL_TRIANGLES:
215 verts_off = num_verts % 3;
216 break;
217 case GL_TRIANGLE_STRIP:
218 if (num_verts < 3)
219 verts_off = num_verts;
220 break;
221 case GL_TRIANGLE_FAN:
222 if (num_verts < 3)
223 verts_off = num_verts;
224 break;
225 case GL_QUADS:
226 verts_off = num_verts % 4;
227 break;
228 case GL_QUAD_STRIP:
229 if (num_verts < 4)
230 verts_off = num_verts;
231 else
232 verts_off = num_verts % 2;
233 break;
234 case GL_POLYGON:
235 if (num_verts < 3)
236 verts_off = num_verts;
237 break;
238 default:
239 assert(0);
240 return -1;
241 break;
242 }
243
244 return num_verts - verts_off;
245 }
246
247 static void r700RunRenderPrimitive(GLcontext * ctx, int start, int end, int prim)
248 {
249 context_t *context = R700_CONTEXT(ctx);
250 BATCH_LOCALS(&context->radeon);
251 int type, i, total_emit;
252 int num_indices;
253 uint32_t vgt_draw_initiator = 0;
254 uint32_t vgt_index_type = 0;
255 uint32_t vgt_primitive_type = 0;
256 uint32_t vgt_num_indices = 0;
257
258 type = r700PrimitiveType(prim);
259 num_indices = r700NumVerts(end - start, prim);
260
261 if (type < 0 || num_indices <= 0)
262 return;
263
264 total_emit = 3 /* VGT_PRIMITIVE_TYPE */
265 + 2 /* VGT_INDEX_TYPE */
266 + 2 /* NUM_INSTANCES */
267 + num_indices + 3; /* DRAW_INDEX_IMMD */
268
269 BEGIN_BATCH_NO_AUTOSTATE(total_emit);
270 // prim
271 SETfield(vgt_primitive_type, type,
272 VGT_PRIMITIVE_TYPE__PRIM_TYPE_shift, VGT_PRIMITIVE_TYPE__PRIM_TYPE_mask);
273 R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1));
274 R600_OUT_BATCH(mmVGT_PRIMITIVE_TYPE - ASIC_CONFIG_BASE_INDEX);
275 R600_OUT_BATCH(vgt_primitive_type);
276
277 // index type
278 SETfield(vgt_index_type, DI_INDEX_SIZE_32_BIT, INDEX_TYPE_shift, INDEX_TYPE_mask);
279 R600_OUT_BATCH(CP_PACKET3(R600_IT_INDEX_TYPE, 0));
280 R600_OUT_BATCH(vgt_index_type);
281
282 // num instances
283 R600_OUT_BATCH(CP_PACKET3(R600_IT_NUM_INSTANCES, 0));
284 R600_OUT_BATCH(1);
285
286 // draw packet
287 vgt_num_indices = num_indices;
288 SETfield(vgt_draw_initiator, DI_SRC_SEL_IMMEDIATE, SOURCE_SELECT_shift, SOURCE_SELECT_mask);
289 SETfield(vgt_draw_initiator, DI_MAJOR_MODE_0, MAJOR_MODE_shift, MAJOR_MODE_mask);
290
291 R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX_IMMD, (num_indices + 1)));
292 R600_OUT_BATCH(vgt_num_indices);
293 R600_OUT_BATCH(vgt_draw_initiator);
294
295 for (i = start; i < (start + num_indices); i++) {
296 R600_OUT_BATCH(i);
297 }
298 END_BATCH();
299 COMMIT_BATCH();
300
301 }
302
303 /* start 3d, idle, cb/db flush */
304 #define PRE_EMIT_STATE_BUFSZ 10 + 5 + 14
305
306 static GLuint r700PredictRenderSize(GLcontext* ctx)
307 {
308 context_t *context = R700_CONTEXT(ctx);
309 TNLcontext *tnl = TNL_CONTEXT(ctx);
310 struct r700_vertex_program *vpc
311 = (struct r700_vertex_program *)ctx->VertexProgram._Current;
312 struct vertex_buffer *vb = &tnl->vb;
313 GLboolean flushed;
314 GLuint dwords, i;
315 GLuint state_size;
316 /* pre calculate aos count so state prediction works */
317 context->radeon.tcl.aos_count = _mesa_bitcount(vpc->mesa_program.Base.InputsRead);
318
319 dwords = PRE_EMIT_STATE_BUFSZ;
320 for (i = 0; i < vb->PrimitiveCount; i++)
321 dwords += vb->Primitive[i].count + 10;
322 state_size = radeonCountStateEmitSize(&context->radeon);
323 flushed = rcommonEnsureCmdBufSpace(&context->radeon,
324 dwords + state_size, __FUNCTION__);
325
326 if (flushed)
327 dwords += radeonCountStateEmitSize(&context->radeon);
328 else
329 dwords += state_size;
330
331 if (RADEON_DEBUG & DEBUG_PRIMS)
332 fprintf(stderr, "%s: total prediction size is %d.\n", __FUNCTION__, dwords);
333 return dwords;
334 }
335
336 static GLboolean r700RunRender(GLcontext * ctx,
337 struct tnl_pipeline_stage *stage)
338 {
339 context_t *context = R700_CONTEXT(ctx);
340 radeonContextPtr radeon = &context->radeon;
341 unsigned int i, id = 0;
342 TNLcontext *tnl = TNL_CONTEXT(ctx);
343 struct vertex_buffer *vb = &tnl->vb;
344 struct radeon_renderbuffer *rrb;
345
346 if (RADEON_DEBUG & DEBUG_PRIMS)
347 fprintf(stderr, "%s: cs begin at %d\n",
348 __func__, context->radeon.cmdbuf.cs->cdw);
349
350 /* always emit CB base to prevent
351 * lock ups on some chips.
352 */
353 R600_STATECHANGE(context, cb_target);
354 /* mark vtx as dirty since it changes per-draw */
355 R600_STATECHANGE(context, vtx);
356
357 r700UpdateShaders(ctx);
358 r700SetScissor(context);
359 r700SetupVertexProgram(ctx);
360 r700SetupFragmentProgram(ctx);
361 r600UpdateTextureState(ctx);
362
363 GLuint emit_end = r700PredictRenderSize(ctx)
364 + context->radeon.cmdbuf.cs->cdw;
365 r700SetupStreams(ctx);
366
367 radeonEmitState(radeon);
368
369 /* richard test code */
370 for (i = 0; i < vb->PrimitiveCount; i++) {
371 GLuint prim = _tnl_translate_prim(&vb->Primitive[i]);
372 GLuint start = vb->Primitive[i].start;
373 GLuint end = vb->Primitive[i].start + vb->Primitive[i].count;
374 r700RunRenderPrimitive(ctx, start, end, prim);
375 }
376
377 /* Flush render op cached for last several quads. */
378 r700WaitForIdleClean(context);
379
380 rrb = radeon_get_colorbuffer(&context->radeon);
381 if (rrb && rrb->bo)
382 r700SyncSurf(context, rrb->bo, 0, RADEON_GEM_DOMAIN_VRAM,
383 CB_ACTION_ENA_bit | (1 << (id + 6)));
384
385 rrb = radeon_get_depthbuffer(&context->radeon);
386 if (rrb && rrb->bo)
387 r700SyncSurf(context, rrb->bo, 0, RADEON_GEM_DOMAIN_VRAM,
388 DB_ACTION_ENA_bit | DB_DEST_BASE_ENA_bit);
389
390 radeonReleaseArrays(ctx, ~0);
391
392 if (RADEON_DEBUG & DEBUG_PRIMS)
393 fprintf(stderr, "%s: cs end at %d\n",
394 __func__, context->radeon.cmdbuf.cs->cdw);
395
396 if ( emit_end < context->radeon.cmdbuf.cs->cdw )
397 WARN_ONCE("Rendering was %d commands larger than predicted size."
398 " We might overflow command buffer.\n", context->radeon.cmdbuf.cs->cdw - emit_end);
399
400 return GL_FALSE;
401 }
402
403 static GLboolean r700RunNonTCLRender(GLcontext * ctx,
404 struct tnl_pipeline_stage *stage) /* -------------------- */
405 {
406 GLboolean bRet = GL_TRUE;
407
408 return bRet;
409 }
410
411 static GLboolean r700RunTCLRender(GLcontext * ctx, /*----------------------*/
412 struct tnl_pipeline_stage *stage)
413 {
414 GLboolean bRet = GL_FALSE;
415
416 /* TODO : sw fallback */
417
418 /**
419 * Ensure all enabled and complete textures are uploaded along with any buffers being used.
420 */
421 if(!r600ValidateBuffers(ctx))
422 {
423 return GL_TRUE;
424 }
425
426 bRet = r700RunRender(ctx, stage);
427
428 return bRet;
429 //GL_FALSE will stop to do other pipe stage in _tnl_run_pipeline
430 //The render here DOES finish the whole pipe, so GL_FALSE should be returned for success.
431 }
432
433 const struct tnl_pipeline_stage _r700_render_stage = {
434 "r700 Hardware Rasterization",
435 NULL,
436 NULL,
437 NULL,
438 NULL,
439 r700RunNonTCLRender
440 };
441
442 const struct tnl_pipeline_stage _r700_tcl_stage = {
443 "r700 Hardware Transform, Clipping and Lighting",
444 NULL,
445 NULL,
446 NULL,
447 NULL,
448 r700RunTCLRender
449 };
450
451 const struct tnl_pipeline_stage *r700_pipeline[] =
452 {
453 &_r700_tcl_stage,
454 &_tnl_vertex_transform_stage,
455 &_tnl_normal_transform_stage,
456 &_tnl_lighting_stage,
457 &_tnl_fog_coordinate_stage,
458 &_tnl_texgen_stage,
459 &_tnl_texture_transform_stage,
460 &_tnl_vertex_program_stage,
461
462 &_r700_render_stage,
463 &_tnl_render_stage,
464 0,
465 };
466
467