r600: Improve emit prediction.
[mesa.git] / src / mesa / drivers / dri / r600 / r700_render.c
1 /*
2 * Copyright (C) 2008-2009 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
18 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
19 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
20 */
21
22 /*
23 * Authors:
24 * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
25 * CooperYuan <cooper.yuan@amd.com>, <cooperyuan@gmail.com>
26 */
27
28 #include "main/glheader.h"
29 #include "main/state.h"
30 #include "main/imports.h"
31 #include "main/enums.h"
32 #include "main/macros.h"
33 #include "main/context.h"
34 #include "main/dd.h"
35 #include "main/simple_list.h"
36 #include "main/api_arrayelt.h"
37 #include "swrast/swrast.h"
38 #include "swrast_setup/swrast_setup.h"
39 #include "vbo/vbo.h"
40
41 #include "tnl/tnl.h"
42 #include "tnl/t_vp_build.h"
43 #include "tnl/t_context.h"
44 #include "tnl/t_vertex.h"
45 #include "tnl/t_pipeline.h"
46
47 #include "r600_context.h"
48 #include "r600_cmdbuf.h"
49
50 #include "r600_tex.h"
51
52 #include "r700_vertprog.h"
53 #include "r700_fragprog.h"
54 #include "r700_state.h"
55
56 void r700WaitForIdle(context_t *context);
57 void r700WaitForIdleClean(context_t *context);
58 GLboolean r700SendTextureState(context_t *context);
59 static unsigned int r700PrimitiveType(int prim);
60 void r600UpdateTextureState(GLcontext * ctx);
61 GLboolean r700SyncSurf(context_t *context,
62 struct radeon_bo *pbo,
63 uint32_t read_domain,
64 uint32_t write_domain,
65 uint32_t sync_type);
66
67 void r700WaitForIdle(context_t *context)
68 {
69 BATCH_LOCALS(&context->radeon);
70 BEGIN_BATCH_NO_AUTOSTATE(3);
71
72 R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1));
73 R600_OUT_BATCH(mmWAIT_UNTIL - ASIC_CONFIG_BASE_INDEX);
74 R600_OUT_BATCH(WAIT_3D_IDLE_bit);
75
76 END_BATCH();
77 COMMIT_BATCH();
78 }
79
80 void r700WaitForIdleClean(context_t *context)
81 {
82 BATCH_LOCALS(&context->radeon);
83 BEGIN_BATCH_NO_AUTOSTATE(5);
84
85 R600_OUT_BATCH(CP_PACKET3(R600_IT_EVENT_WRITE, 0));
86 R600_OUT_BATCH(CACHE_FLUSH_AND_INV_EVENT);
87
88 R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1));
89 R600_OUT_BATCH(mmWAIT_UNTIL - ASIC_CONFIG_BASE_INDEX);
90 R600_OUT_BATCH(WAIT_3D_IDLE_bit | WAIT_3D_IDLECLEAN_bit);
91
92 END_BATCH();
93 COMMIT_BATCH();
94 }
95
96 void r700Start3D(context_t *context)
97 {
98 BATCH_LOCALS(&context->radeon);
99 if (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770)
100 {
101 BEGIN_BATCH_NO_AUTOSTATE(2);
102 R600_OUT_BATCH(CP_PACKET3(R600_IT_START_3D_CMDBUF, 0));
103 R600_OUT_BATCH(0);
104 END_BATCH();
105 }
106
107 BEGIN_BATCH_NO_AUTOSTATE(3);
108 R600_OUT_BATCH(CP_PACKET3(R600_IT_CONTEXT_CONTROL, 1));
109 R600_OUT_BATCH(0x80000000);
110 R600_OUT_BATCH(0x80000000);
111 END_BATCH();
112
113 COMMIT_BATCH();
114
115 r700WaitForIdleClean(context);
116 }
117
118 GLboolean r700SyncSurf(context_t *context,
119 struct radeon_bo *pbo,
120 uint32_t read_domain,
121 uint32_t write_domain,
122 uint32_t sync_type)
123 {
124 BATCH_LOCALS(&context->radeon);
125 uint32_t cp_coher_size;
126
127 if (!pbo)
128 return GL_FALSE;
129
130 if (pbo->size == 0xffffffff)
131 cp_coher_size = 0xffffffff;
132 else
133 cp_coher_size = ((pbo->size + 255) >> 8);
134
135 BEGIN_BATCH_NO_AUTOSTATE(5 + 2);
136 R600_OUT_BATCH(CP_PACKET3(R600_IT_SURFACE_SYNC, 3));
137 R600_OUT_BATCH(sync_type);
138 R600_OUT_BATCH(cp_coher_size);
139 R600_OUT_BATCH(0);
140 R600_OUT_BATCH(10);
141 R600_OUT_BATCH_RELOC(0,
142 pbo,
143 0,
144 read_domain, write_domain, 0); // ???
145
146 END_BATCH();
147 COMMIT_BATCH();
148
149 return GL_TRUE;
150 }
151
152 static unsigned int r700PrimitiveType(int prim)
153 {
154 switch (prim & PRIM_MODE_MASK)
155 {
156 case GL_POINTS:
157 return DI_PT_POINTLIST;
158 break;
159 case GL_LINES:
160 return DI_PT_LINELIST;
161 break;
162 case GL_LINE_STRIP:
163 return DI_PT_LINESTRIP;
164 break;
165 case GL_LINE_LOOP:
166 return DI_PT_LINELOOP;
167 break;
168 case GL_TRIANGLES:
169 return DI_PT_TRILIST;
170 break;
171 case GL_TRIANGLE_STRIP:
172 return DI_PT_TRISTRIP;
173 break;
174 case GL_TRIANGLE_FAN:
175 return DI_PT_TRIFAN;
176 break;
177 case GL_QUADS:
178 return DI_PT_QUADLIST;
179 break;
180 case GL_QUAD_STRIP:
181 return DI_PT_QUADSTRIP;
182 break;
183 case GL_POLYGON:
184 return DI_PT_POLYGON;
185 break;
186 default:
187 assert(0);
188 return -1;
189 break;
190 }
191 }
192
193 static int r700NumVerts(int num_verts, int prim)
194 {
195 int verts_off = 0;
196
197 switch (prim & PRIM_MODE_MASK) {
198 case GL_POINTS:
199 verts_off = 0;
200 break;
201 case GL_LINES:
202 verts_off = num_verts % 2;
203 break;
204 case GL_LINE_STRIP:
205 if (num_verts < 2)
206 verts_off = num_verts;
207 break;
208 case GL_LINE_LOOP:
209 if (num_verts < 2)
210 verts_off = num_verts;
211 break;
212 case GL_TRIANGLES:
213 verts_off = num_verts % 3;
214 break;
215 case GL_TRIANGLE_STRIP:
216 if (num_verts < 3)
217 verts_off = num_verts;
218 break;
219 case GL_TRIANGLE_FAN:
220 if (num_verts < 3)
221 verts_off = num_verts;
222 break;
223 case GL_QUADS:
224 verts_off = num_verts % 4;
225 break;
226 case GL_QUAD_STRIP:
227 if (num_verts < 4)
228 verts_off = num_verts;
229 else
230 verts_off = num_verts % 2;
231 break;
232 case GL_POLYGON:
233 if (num_verts < 3)
234 verts_off = num_verts;
235 break;
236 default:
237 assert(0);
238 return -1;
239 break;
240 }
241
242 return num_verts - verts_off;
243 }
244
245 static void r700RunRenderPrimitive(GLcontext * ctx, int start, int end, int prim)
246 {
247 context_t *context = R700_CONTEXT(ctx);
248 BATCH_LOCALS(&context->radeon);
249 int type, i, total_emit;
250 int num_indices;
251 uint32_t vgt_draw_initiator = 0;
252 uint32_t vgt_index_type = 0;
253 uint32_t vgt_primitive_type = 0;
254 uint32_t vgt_num_indices = 0;
255
256 type = r700PrimitiveType(prim);
257 num_indices = r700NumVerts(end - start, prim);
258
259 if (type < 0 || num_indices <= 0)
260 return;
261
262 total_emit = 3 /* VGT_PRIMITIVE_TYPE */
263 + 2 /* VGT_INDEX_TYPE */
264 + 2 /* NUM_INSTANCES */
265 + num_indices + 3; /* DRAW_INDEX_IMMD */
266
267 BEGIN_BATCH_NO_AUTOSTATE(total_emit);
268 // prim
269 SETfield(vgt_primitive_type, type,
270 VGT_PRIMITIVE_TYPE__PRIM_TYPE_shift, VGT_PRIMITIVE_TYPE__PRIM_TYPE_mask);
271 R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1));
272 R600_OUT_BATCH(mmVGT_PRIMITIVE_TYPE - ASIC_CONFIG_BASE_INDEX);
273 R600_OUT_BATCH(vgt_primitive_type);
274
275 // index type
276 SETfield(vgt_index_type, DI_INDEX_SIZE_32_BIT, INDEX_TYPE_shift, INDEX_TYPE_mask);
277 R600_OUT_BATCH(CP_PACKET3(R600_IT_INDEX_TYPE, 0));
278 R600_OUT_BATCH(vgt_index_type);
279
280 // num instances
281 R600_OUT_BATCH(CP_PACKET3(R600_IT_NUM_INSTANCES, 0));
282 R600_OUT_BATCH(1);
283
284 // draw packet
285 vgt_num_indices = num_indices;
286 SETfield(vgt_draw_initiator, DI_SRC_SEL_IMMEDIATE, SOURCE_SELECT_shift, SOURCE_SELECT_mask);
287 SETfield(vgt_draw_initiator, DI_MAJOR_MODE_0, MAJOR_MODE_shift, MAJOR_MODE_mask);
288
289 R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX_IMMD, (num_indices + 1)));
290 R600_OUT_BATCH(vgt_num_indices);
291 R600_OUT_BATCH(vgt_draw_initiator);
292
293 for (i = start; i < (start + num_indices); i++) {
294 R600_OUT_BATCH(i);
295 }
296 END_BATCH();
297 COMMIT_BATCH();
298
299 }
300
301 /* start 3d, idle, cb/db flush */
302 #define PRE_EMIT_STATE_BUFSZ 10 + 5 + 14
303
304 static GLuint r700PredictRenderSize(GLcontext* ctx)
305 {
306 context_t *context = R700_CONTEXT(ctx);
307 TNLcontext *tnl = TNL_CONTEXT(ctx);
308 struct r700_vertex_program *vpc
309 = (struct r700_vertex_program *)ctx->VertexProgram._Current;
310 struct vertex_buffer *vb = &tnl->vb;
311 GLboolean flushed;
312 GLuint dwords, i;
313 GLuint state_size;
314 /* pre calculate aos count so state prediction works */
315 context->radeon.tcl.aos_count = _mesa_bitcount(vpc->mesa_program.Base.InputsRead);
316
317 dwords = PRE_EMIT_STATE_BUFSZ;
318 for (i = 0; i < vb->PrimitiveCount; i++)
319 dwords += vb->Primitive[i].count + 10;
320 state_size = radeonCountStateEmitSize(&context->radeon);
321 flushed = rcommonEnsureCmdBufSpace(&context->radeon,
322 dwords + state_size, __FUNCTION__);
323
324 if (flushed)
325 dwords += radeonCountStateEmitSize(&context->radeon);
326 else
327 dwords += state_size;
328
329 if (RADEON_DEBUG & DEBUG_PRIMS)
330 fprintf(stderr, "%s: total prediction size is %d.\n", __FUNCTION__, dwords);
331 return dwords;
332 }
333
334 static GLboolean r700RunRender(GLcontext * ctx,
335 struct tnl_pipeline_stage *stage)
336 {
337 context_t *context = R700_CONTEXT(ctx);
338 radeonContextPtr radeon = &context->radeon;
339 unsigned int i, id = 0;
340 TNLcontext *tnl = TNL_CONTEXT(ctx);
341 struct vertex_buffer *vb = &tnl->vb;
342 struct radeon_renderbuffer *rrb;
343
344 if (RADEON_DEBUG & DEBUG_PRIMS)
345 fprintf(stderr, "%s: cs begin at %d\n",
346 __func__, context->radeon.cmdbuf.cs->cdw);
347
348 r700UpdateShaders(ctx);
349 r700SetScissor(context);
350 r700SetupVertexProgram(ctx);
351 r700SetupFragmentProgram(ctx);
352 r600UpdateTextureState(ctx);
353
354 GLuint emit_end = r700PredictRenderSize(ctx)
355 + context->radeon.cmdbuf.cs->cdw;
356 r700SetupStreams(ctx);
357
358 radeonEmitState(radeon);
359
360 /* richard test code */
361 for (i = 0; i < vb->PrimitiveCount; i++) {
362 GLuint prim = _tnl_translate_prim(&vb->Primitive[i]);
363 GLuint start = vb->Primitive[i].start;
364 GLuint end = vb->Primitive[i].start + vb->Primitive[i].count;
365 r700RunRenderPrimitive(ctx, start, end, prim);
366 }
367
368 /* Flush render op cached for last several quads. */
369 r700WaitForIdleClean(context);
370
371 rrb = radeon_get_colorbuffer(&context->radeon);
372 if (!rrb || !rrb->bo)
373 r700SyncSurf(context, rrb->bo, 0, RADEON_GEM_DOMAIN_VRAM,
374 CB_ACTION_ENA_bit | (1 << (id + 6)));
375
376 rrb = radeon_get_depthbuffer(&context->radeon);
377 if (!rrb || !rrb->bo)
378 r700SyncSurf(context, rrb->bo, 0, RADEON_GEM_DOMAIN_VRAM,
379 DB_ACTION_ENA_bit | DB_DEST_BASE_ENA_bit);
380
381 radeonReleaseArrays(ctx, ~0);
382 assert(context->radeon.cmdbuf.cs->cdw <= emit_end);
383
384 return GL_FALSE;
385 }
386
387 static GLboolean r700RunNonTCLRender(GLcontext * ctx,
388 struct tnl_pipeline_stage *stage) /* -------------------- */
389 {
390 GLboolean bRet = GL_TRUE;
391
392 return bRet;
393 }
394
395 static GLboolean r700RunTCLRender(GLcontext * ctx, /*----------------------*/
396 struct tnl_pipeline_stage *stage)
397 {
398 GLboolean bRet = GL_FALSE;
399
400 /* TODO : sw fallback */
401
402 /**
403 * Ensure all enabled and complete textures are uploaded along with any buffers being used.
404 */
405 if(!r600ValidateBuffers(ctx))
406 {
407 return GL_TRUE;
408 }
409
410 bRet = r700RunRender(ctx, stage);
411
412 return bRet;
413 //GL_FALSE will stop to do other pipe stage in _tnl_run_pipeline
414 //The render here DOES finish the whole pipe, so GL_FALSE should be returned for success.
415 }
416
417 const struct tnl_pipeline_stage _r700_render_stage = {
418 "r700 Hardware Rasterization",
419 NULL,
420 NULL,
421 NULL,
422 NULL,
423 r700RunNonTCLRender
424 };
425
426 const struct tnl_pipeline_stage _r700_tcl_stage = {
427 "r700 Hardware Transform, Clipping and Lighting",
428 NULL,
429 NULL,
430 NULL,
431 NULL,
432 r700RunTCLRender
433 };
434
435 const struct tnl_pipeline_stage *r700_pipeline[] =
436 {
437 &_r700_tcl_stage,
438 &_tnl_vertex_transform_stage,
439 &_tnl_normal_transform_stage,
440 &_tnl_lighting_stage,
441 &_tnl_fog_coordinate_stage,
442 &_tnl_texgen_stage,
443 &_tnl_texture_transform_stage,
444 &_tnl_vertex_program_stage,
445
446 &_r700_render_stage,
447 &_tnl_render_stage,
448 0,
449 };
450
451