r600: use AUTO_INDEX for draw - saves cmd buffer space
[mesa.git] / src / mesa / drivers / dri / r600 / r700_render.c
1 /*
2 * Copyright (C) 2008-2009 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
18 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
19 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
20 */
21
22 /*
23 * Authors:
24 * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
25 * CooperYuan <cooper.yuan@amd.com>, <cooperyuan@gmail.com>
26 */
27
28 #include "main/glheader.h"
29 #include "main/state.h"
30 #include "main/imports.h"
31 #include "main/enums.h"
32 #include "main/macros.h"
33 #include "main/context.h"
34 #include "main/dd.h"
35 #include "main/simple_list.h"
36 #include "main/api_arrayelt.h"
37 #include "swrast/swrast.h"
38 #include "swrast_setup/swrast_setup.h"
39 #include "vbo/vbo.h"
40
41 #include "tnl/tnl.h"
42 #include "tnl/t_vp_build.h"
43 #include "tnl/t_context.h"
44 #include "tnl/t_vertex.h"
45 #include "tnl/t_pipeline.h"
46 #include "vbo/vbo_context.h"
47
48 #include "r600_context.h"
49 #include "r600_cmdbuf.h"
50
51 #include "r600_tex.h"
52
53 #include "r700_vertprog.h"
54 #include "r700_fragprog.h"
55 #include "r700_state.h"
56
57 #include "radeon_buffer_objects.h"
58 #include "radeon_common_context.h"
59
60 void r700WaitForIdle(context_t *context);
61 void r700WaitForIdleClean(context_t *context);
62 GLboolean r700SendTextureState(context_t *context);
63 static unsigned int r700PrimitiveType(int prim);
64 void r600UpdateTextureState(GLcontext * ctx);
65 GLboolean r700SyncSurf(context_t *context,
66 struct radeon_bo *pbo,
67 uint32_t read_domain,
68 uint32_t write_domain,
69 uint32_t sync_type);
70
71 void r700WaitForIdle(context_t *context)
72 {
73 BATCH_LOCALS(&context->radeon);
74 radeon_print(RADEON_RENDER | RADEON_STATE, RADEON_TRACE, "%s\n", __func__);
75 BEGIN_BATCH_NO_AUTOSTATE(3);
76
77 R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1));
78 R600_OUT_BATCH(mmWAIT_UNTIL - ASIC_CONFIG_BASE_INDEX);
79 R600_OUT_BATCH(WAIT_3D_IDLE_bit);
80
81 END_BATCH();
82 COMMIT_BATCH();
83 }
84
85 void r700WaitForIdleClean(context_t *context)
86 {
87 BATCH_LOCALS(&context->radeon);
88 radeon_print(RADEON_RENDER | RADEON_STATE, RADEON_TRACE, "%s\n", __func__);
89 BEGIN_BATCH_NO_AUTOSTATE(5);
90
91 R600_OUT_BATCH(CP_PACKET3(R600_IT_EVENT_WRITE, 0));
92 R600_OUT_BATCH(CACHE_FLUSH_AND_INV_EVENT);
93
94 R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1));
95 R600_OUT_BATCH(mmWAIT_UNTIL - ASIC_CONFIG_BASE_INDEX);
96 R600_OUT_BATCH(WAIT_3D_IDLE_bit | WAIT_3D_IDLECLEAN_bit);
97
98 END_BATCH();
99 COMMIT_BATCH();
100 }
101
102 void r700Start3D(context_t *context)
103 {
104 BATCH_LOCALS(&context->radeon);
105 radeon_print(RADEON_RENDER | RADEON_STATE, RADEON_TRACE, "%s\n", __func__);
106 if (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770)
107 {
108 BEGIN_BATCH_NO_AUTOSTATE(2);
109 R600_OUT_BATCH(CP_PACKET3(R600_IT_START_3D_CMDBUF, 0));
110 R600_OUT_BATCH(0);
111 END_BATCH();
112 }
113
114 BEGIN_BATCH_NO_AUTOSTATE(3);
115 R600_OUT_BATCH(CP_PACKET3(R600_IT_CONTEXT_CONTROL, 1));
116 R600_OUT_BATCH(0x80000000);
117 R600_OUT_BATCH(0x80000000);
118 END_BATCH();
119
120 COMMIT_BATCH();
121
122 r700WaitForIdleClean(context);
123 }
124
125 GLboolean r700SyncSurf(context_t *context,
126 struct radeon_bo *pbo,
127 uint32_t read_domain,
128 uint32_t write_domain,
129 uint32_t sync_type)
130 {
131 BATCH_LOCALS(&context->radeon);
132 radeon_print(RADEON_RENDER | RADEON_STATE, RADEON_TRACE, "%s\n", __func__);
133 uint32_t cp_coher_size;
134
135 if (!pbo)
136 return GL_FALSE;
137
138 if (pbo->size == 0xffffffff)
139 cp_coher_size = 0xffffffff;
140 else
141 cp_coher_size = ((pbo->size + 255) >> 8);
142
143 BEGIN_BATCH_NO_AUTOSTATE(5 + 2);
144 R600_OUT_BATCH(CP_PACKET3(R600_IT_SURFACE_SYNC, 3));
145 R600_OUT_BATCH(sync_type);
146 R600_OUT_BATCH(cp_coher_size);
147 R600_OUT_BATCH(0);
148 R600_OUT_BATCH(10);
149 R600_OUT_BATCH_RELOC(0,
150 pbo,
151 0,
152 read_domain, write_domain, 0);
153 END_BATCH();
154 COMMIT_BATCH();
155
156 return GL_TRUE;
157 }
158
159 static unsigned int r700PrimitiveType(int prim)
160 {
161 switch (prim & PRIM_MODE_MASK)
162 {
163 case GL_POINTS:
164 return DI_PT_POINTLIST;
165 break;
166 case GL_LINES:
167 return DI_PT_LINELIST;
168 break;
169 case GL_LINE_STRIP:
170 return DI_PT_LINESTRIP;
171 break;
172 case GL_LINE_LOOP:
173 return DI_PT_LINELOOP;
174 break;
175 case GL_TRIANGLES:
176 return DI_PT_TRILIST;
177 break;
178 case GL_TRIANGLE_STRIP:
179 return DI_PT_TRISTRIP;
180 break;
181 case GL_TRIANGLE_FAN:
182 return DI_PT_TRIFAN;
183 break;
184 case GL_QUADS:
185 return DI_PT_QUADLIST;
186 break;
187 case GL_QUAD_STRIP:
188 return DI_PT_QUADSTRIP;
189 break;
190 case GL_POLYGON:
191 return DI_PT_POLYGON;
192 break;
193 default:
194 assert(0);
195 return -1;
196 break;
197 }
198 }
199
200 static int r700NumVerts(int num_verts, int prim)
201 {
202 int verts_off = 0;
203
204 switch (prim & PRIM_MODE_MASK) {
205 case GL_POINTS:
206 verts_off = 0;
207 break;
208 case GL_LINES:
209 verts_off = num_verts % 2;
210 break;
211 case GL_LINE_STRIP:
212 if (num_verts < 2)
213 verts_off = num_verts;
214 break;
215 case GL_LINE_LOOP:
216 if (num_verts < 2)
217 verts_off = num_verts;
218 break;
219 case GL_TRIANGLES:
220 verts_off = num_verts % 3;
221 break;
222 case GL_TRIANGLE_STRIP:
223 if (num_verts < 3)
224 verts_off = num_verts;
225 break;
226 case GL_TRIANGLE_FAN:
227 if (num_verts < 3)
228 verts_off = num_verts;
229 break;
230 case GL_QUADS:
231 verts_off = num_verts % 4;
232 break;
233 case GL_QUAD_STRIP:
234 if (num_verts < 4)
235 verts_off = num_verts;
236 else
237 verts_off = num_verts % 2;
238 break;
239 case GL_POLYGON:
240 if (num_verts < 3)
241 verts_off = num_verts;
242 break;
243 default:
244 assert(0);
245 return -1;
246 break;
247 }
248
249 return num_verts - verts_off;
250 }
251
252 static void r700RunRenderPrimitive(GLcontext * ctx, int start, int end, int prim)
253 {
254 context_t *context = R700_CONTEXT(ctx);
255 BATCH_LOCALS(&context->radeon);
256 int type, i, total_emit;
257 int num_indices;
258 uint32_t vgt_draw_initiator = 0;
259 uint32_t vgt_index_type = 0;
260 uint32_t vgt_primitive_type = 0;
261 uint32_t vgt_num_indices = 0;
262 GLboolean bUseDrawIndex;
263
264 if(NULL != context->ind_buf.bo)
265 {
266 bUseDrawIndex = GL_TRUE;
267 }
268 else
269 {
270 bUseDrawIndex = GL_FALSE;
271 }
272
273 type = r700PrimitiveType(prim);
274 num_indices = r700NumVerts(end - start, prim);
275
276 radeon_print(RADEON_RENDER, RADEON_TRACE,
277 "%s type %x num_indices %d\n",
278 __func__, type, num_indices);
279
280 if (type < 0 || num_indices <= 0)
281 return;
282
283 if(GL_TRUE == bUseDrawIndex)
284 {
285 total_emit = 3 /* VGT_PRIMITIVE_TYPE */
286 + 2 /* VGT_INDEX_TYPE */
287 + 2 /* NUM_INSTANCES */
288 + 3 /* VGT_INDEX_OFFSET */
289 + 5 + 2; /* DRAW_INDEX */
290 }
291 else
292 {
293 total_emit = 3 /* VGT_PRIMITIVE_TYPE */
294 + 2 /* VGT_INDEX_TYPE */
295 + 2 /* NUM_INSTANCES */
296 + 3 /* VGT_INDEX_OFFSET */
297 + 3; /* DRAW_INDEX_IMMD */
298 }
299
300 BEGIN_BATCH_NO_AUTOSTATE(total_emit);
301 // prim
302 SETfield(vgt_primitive_type, type,
303 VGT_PRIMITIVE_TYPE__PRIM_TYPE_shift, VGT_PRIMITIVE_TYPE__PRIM_TYPE_mask);
304 R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1));
305 R600_OUT_BATCH(mmVGT_PRIMITIVE_TYPE - ASIC_CONFIG_BASE_INDEX);
306 R600_OUT_BATCH(vgt_primitive_type);
307
308 // index type
309 SETfield(vgt_index_type, DI_INDEX_SIZE_32_BIT, INDEX_TYPE_shift, INDEX_TYPE_mask);
310
311 if(GL_TRUE == bUseDrawIndex)
312 {
313 if(GL_TRUE != context->ind_buf.is_32bit)
314 {
315 SETfield(vgt_index_type, DI_INDEX_SIZE_16_BIT, INDEX_TYPE_shift, INDEX_TYPE_mask);
316 }
317 }
318
319 R600_OUT_BATCH(CP_PACKET3(R600_IT_INDEX_TYPE, 0));
320 R600_OUT_BATCH(vgt_index_type);
321
322 // num instances
323 R600_OUT_BATCH(CP_PACKET3(R600_IT_NUM_INSTANCES, 0));
324 R600_OUT_BATCH(1);
325
326 // draw packet
327 vgt_num_indices = num_indices;
328
329 if(GL_TRUE == bUseDrawIndex)
330 {
331 SETfield(vgt_draw_initiator, DI_SRC_SEL_DMA, SOURCE_SELECT_shift, SOURCE_SELECT_mask);
332 }
333 else
334 {
335 SETfield(vgt_draw_initiator, DI_SRC_SEL_AUTO_INDEX, SOURCE_SELECT_shift, SOURCE_SELECT_mask);
336 }
337
338 SETfield(vgt_draw_initiator, DI_MAJOR_MODE_0, MAJOR_MODE_shift, MAJOR_MODE_mask);
339
340 if(GL_TRUE == bUseDrawIndex)
341 {
342 R600_OUT_BATCH_REGSEQ(VGT_INDX_OFFSET, 1);
343 R600_OUT_BATCH(0);
344 R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX, 3));
345 R600_OUT_BATCH(context->ind_buf.bo_offset);
346 R600_OUT_BATCH(0);
347 R600_OUT_BATCH(vgt_num_indices);
348 R600_OUT_BATCH(vgt_draw_initiator);
349 R600_OUT_BATCH_RELOC(context->ind_buf.bo_offset,
350 context->ind_buf.bo,
351 context->ind_buf.bo_offset,
352 RADEON_GEM_DOMAIN_GTT, 0, 0);
353 }
354 else
355 {
356 R600_OUT_BATCH_REGSEQ(VGT_INDX_OFFSET, 1);
357 R600_OUT_BATCH(start);
358 R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX_AUTO,1));
359 R600_OUT_BATCH(vgt_num_indices);
360 R600_OUT_BATCH(vgt_draw_initiator);
361 }
362
363 END_BATCH();
364 COMMIT_BATCH();
365 }
366
367 /* start 3d, idle, cb/db flush */
368 #define PRE_EMIT_STATE_BUFSZ 10 + 5 + 14
369
370 static GLuint r700PredictRenderSize(GLcontext* ctx,
371 const struct _mesa_prim *prim,
372 const struct _mesa_index_buffer *ib,
373 GLuint nr_prims)
374 {
375 context_t *context = R700_CONTEXT(ctx);
376 GLboolean flushed;
377 GLuint dwords, i;
378 GLuint state_size;
379
380 dwords = PRE_EMIT_STATE_BUFSZ;
381 if (ib)
382 dwords += nr_prims * 14;
383 else {
384 for (i = 0; i < nr_prims; ++i)
385 {
386 dwords += 13;
387 }
388 }
389
390 state_size = radeonCountStateEmitSize(&context->radeon);
391 flushed = rcommonEnsureCmdBufSpace(&context->radeon,
392 dwords + state_size,
393 __FUNCTION__);
394 if (flushed)
395 dwords += radeonCountStateEmitSize(&context->radeon);
396 else
397 dwords += state_size;
398
399 radeon_print(RADEON_RENDER, RADEON_VERBOSE, "%s: total prediction size is %d.\n", __FUNCTION__, dwords);
400 return dwords;
401
402 }
403
404 #define CONVERT( TYPE, MACRO ) do { \
405 GLuint i, j, sz; \
406 sz = input->Size; \
407 if (input->Normalized) { \
408 for (i = 0; i < count; i++) { \
409 const TYPE *in = (TYPE *)src_ptr; \
410 for (j = 0; j < sz; j++) { \
411 *dst_ptr++ = MACRO(*in); \
412 in++; \
413 } \
414 src_ptr += stride; \
415 } \
416 } else { \
417 for (i = 0; i < count; i++) { \
418 const TYPE *in = (TYPE *)src_ptr; \
419 for (j = 0; j < sz; j++) { \
420 *dst_ptr++ = (GLfloat)(*in); \
421 in++; \
422 } \
423 src_ptr += stride; \
424 } \
425 } \
426 } while (0)
427
428 /**
429 * Convert attribute data type to float
430 * If the attribute uses named buffer object replace the bo with newly allocated bo
431 */
432 static void r700ConvertAttrib(GLcontext *ctx, int count,
433 const struct gl_client_array *input,
434 struct StreamDesc *attr)
435 {
436 context_t *context = R700_CONTEXT(ctx);
437 const GLvoid *src_ptr;
438 GLboolean mapped_named_bo = GL_FALSE;
439 GLfloat *dst_ptr;
440 GLuint stride;
441
442 stride = (input->StrideB == 0) ? getTypeSize(input->Type) * input->Size : input->StrideB;
443
444 /* Convert value for first element only */
445 if (input->StrideB == 0)
446 {
447 count = 1;
448 }
449
450 if (input->BufferObj->Name)
451 {
452 if (!input->BufferObj->Pointer)
453 {
454 ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY_ARB, input->BufferObj);
455 mapped_named_bo = GL_TRUE;
456 }
457
458 src_ptr = ADD_POINTERS(input->BufferObj->Pointer, input->Ptr);
459 }
460 else
461 {
462 src_ptr = input->Ptr;
463 }
464
465 radeonAllocDmaRegion(&context->radeon, &attr->bo, &attr->bo_offset,
466 sizeof(GLfloat) * input->Size * count, 32);
467 dst_ptr = (GLfloat *)ADD_POINTERS(attr->bo->ptr, attr->bo_offset);
468
469 assert(src_ptr != NULL);
470
471 switch (input->Type)
472 {
473 case GL_DOUBLE:
474 CONVERT(GLdouble, (GLfloat));
475 break;
476 case GL_UNSIGNED_INT:
477 CONVERT(GLuint, UINT_TO_FLOAT);
478 break;
479 case GL_INT:
480 CONVERT(GLint, INT_TO_FLOAT);
481 break;
482 case GL_UNSIGNED_SHORT:
483 CONVERT(GLushort, USHORT_TO_FLOAT);
484 break;
485 case GL_SHORT:
486 CONVERT(GLshort, SHORT_TO_FLOAT);
487 break;
488 case GL_UNSIGNED_BYTE:
489 assert(input->Format != GL_BGRA);
490 CONVERT(GLubyte, UBYTE_TO_FLOAT);
491 break;
492 case GL_BYTE:
493 CONVERT(GLbyte, BYTE_TO_FLOAT);
494 break;
495 default:
496 assert(0);
497 break;
498 }
499
500 if (mapped_named_bo)
501 {
502 ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, input->BufferObj);
503 }
504 }
505
506 static void r700AlignDataToDword(GLcontext *ctx,
507 const struct gl_client_array *input,
508 int count,
509 struct StreamDesc *attr)
510 {
511 context_t *context = R700_CONTEXT(ctx);
512 const int dst_stride = (input->StrideB + 3) & ~3;
513 const int size = getTypeSize(input->Type) * input->Size * count;
514 GLboolean mapped_named_bo = GL_FALSE;
515
516 radeonAllocDmaRegion(&context->radeon, &attr->bo, &attr->bo_offset, size, 32);
517
518 if (!input->BufferObj->Pointer)
519 {
520 ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY_ARB, input->BufferObj);
521 mapped_named_bo = GL_TRUE;
522 }
523
524 {
525 GLvoid *src_ptr = ADD_POINTERS(input->BufferObj->Pointer, input->Ptr);
526 GLvoid *dst_ptr = ADD_POINTERS(attr->bo->ptr, attr->bo_offset);
527 int i;
528
529 for (i = 0; i < count; ++i)
530 {
531 _mesa_memcpy(dst_ptr, src_ptr, input->StrideB);
532 src_ptr += input->StrideB;
533 dst_ptr += dst_stride;
534 }
535 }
536
537 if (mapped_named_bo)
538 {
539 ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, input->BufferObj);
540 }
541
542 attr->stride = dst_stride;
543 }
544
545 static void r700SetupStreams(GLcontext *ctx, const struct gl_client_array *input[], int count)
546 {
547 context_t *context = R700_CONTEXT(ctx);
548 GLuint stride;
549 int ret;
550 int i, index;
551
552 R600_STATECHANGE(context, vtx);
553
554 for(index = 0; index < context->nNumActiveAos; index++)
555 {
556 struct radeon_aos *aos = &context->radeon.tcl.aos[index];
557 i = context->stream_desc[index].element;
558
559 stride = (input[i]->StrideB == 0) ? getTypeSize(input[i]->Type) * input[i]->Size : input[i]->StrideB;
560
561 if (input[i]->Type == GL_DOUBLE || input[i]->Type == GL_UNSIGNED_INT || input[i]->Type == GL_INT ||
562 #if MESA_BIG_ENDIAN
563 getTypeSize(input[i]->Type) != 4 ||
564 #endif
565 stride < 4)
566 {
567 r700ConvertAttrib(ctx, count, input[i], &context->stream_desc[index]);
568 }
569 else
570 {
571 if (input[i]->BufferObj->Name)
572 {
573 if (stride % 4 != 0)
574 {
575 assert(((intptr_t) input[i]->Ptr) % input[i]->StrideB == 0);
576 r700AlignDataToDword(ctx, input[i], count, &context->stream_desc[index]);
577 context->stream_desc[index].is_named_bo = GL_FALSE;
578 }
579 else
580 {
581 context->stream_desc[index].stride = input[i]->StrideB;
582 context->stream_desc[index].bo_offset = (intptr_t) input[i]->Ptr;
583 context->stream_desc[index].bo = get_radeon_buffer_object(input[i]->BufferObj)->bo;
584 context->stream_desc[index].is_named_bo = GL_TRUE;
585 }
586 }
587 else
588 {
589 int size;
590 int local_count = count;
591 uint32_t *dst;
592
593 if (input[i]->StrideB == 0)
594 {
595 size = getTypeSize(input[i]->Type) * input[i]->Size;
596 local_count = 1;
597 }
598 else
599 {
600 size = getTypeSize(input[i]->Type) * input[i]->Size * local_count;
601 }
602
603 radeonAllocDmaRegion(&context->radeon, &context->stream_desc[index].bo,
604 &context->stream_desc[index].bo_offset, size, 32);
605 assert(context->stream_desc[index].bo->ptr != NULL);
606 dst = (uint32_t *)ADD_POINTERS(context->stream_desc[index].bo->ptr,
607 context->stream_desc[index].bo_offset);
608
609 switch (context->stream_desc[index].dwords)
610 {
611 case 1:
612 radeonEmitVec4(dst, input[i]->Ptr, input[i]->StrideB, local_count);
613 break;
614 case 2:
615 radeonEmitVec8(dst, input[i]->Ptr, input[i]->StrideB, local_count);
616 break;
617 case 3:
618 radeonEmitVec12(dst, input[i]->Ptr, input[i]->StrideB, local_count);
619 break;
620 case 4:
621 radeonEmitVec16(dst, input[i]->Ptr, input[i]->StrideB, local_count);
622 break;
623 default:
624 assert(0);
625 break;
626 }
627 }
628 }
629
630 aos->count = context->stream_desc[index].stride == 0 ? 1 : count;
631 aos->stride = context->stream_desc[index].stride / sizeof(float);
632 aos->components = context->stream_desc[index].dwords;
633 aos->bo = context->stream_desc[index].bo;
634 aos->offset = context->stream_desc[index].bo_offset;
635
636 if(context->stream_desc[index].is_named_bo)
637 {
638 radeon_cs_space_add_persistent_bo(context->radeon.cmdbuf.cs,
639 context->stream_desc[index].bo,
640 RADEON_GEM_DOMAIN_GTT, 0);
641 }
642 }
643
644 ret = radeon_cs_space_check_with_bo(context->radeon.cmdbuf.cs,
645 first_elem(&context->radeon.dma.reserved)->bo,
646 RADEON_GEM_DOMAIN_GTT, 0);
647 }
648
649 static void r700FreeData(GLcontext *ctx)
650 {
651 /* Need to zero tcl.aos[n].bo and tcl.elt_dma_bo
652 * to prevent double unref in radeonReleaseArrays
653 * called during context destroy
654 */
655 context_t *context = R700_CONTEXT(ctx);
656
657 int i;
658
659 for (i = 0; i < context->nNumActiveAos; i++)
660 {
661 if (!context->stream_desc[i].is_named_bo)
662 {
663 radeon_bo_unref(context->stream_desc[i].bo);
664 }
665 context->radeon.tcl.aos[i].bo = NULL;
666 }
667
668 if (context->ind_buf.bo != NULL)
669 {
670 radeon_bo_unref(context->ind_buf.bo);
671 }
672 }
673
674 static void r700FixupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer *mesa_ind_buf)
675 {
676 context_t *context = R700_CONTEXT(ctx);
677 GLvoid *src_ptr;
678 GLuint *out;
679 int i;
680 GLboolean mapped_named_bo = GL_FALSE;
681
682 if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer)
683 {
684 ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY_ARB, mesa_ind_buf->obj);
685 mapped_named_bo = GL_TRUE;
686 assert(mesa_ind_buf->obj->Pointer != NULL);
687 }
688 src_ptr = ADD_POINTERS(mesa_ind_buf->obj->Pointer, mesa_ind_buf->ptr);
689
690 if (mesa_ind_buf->type == GL_UNSIGNED_BYTE)
691 {
692 GLuint size = sizeof(GLushort) * ((mesa_ind_buf->count + 1) & ~1);
693 GLubyte *in = (GLubyte *)src_ptr;
694
695 radeonAllocDmaRegion(&context->radeon, &context->ind_buf.bo,
696 &context->ind_buf.bo_offset, size, 4);
697
698 assert(context->ind_buf.bo->ptr != NULL);
699 out = (GLuint *)ADD_POINTERS(context->ind_buf.bo->ptr, context->ind_buf.bo_offset);
700
701 for (i = 0; i + 1 < mesa_ind_buf->count; i += 2)
702 {
703 *out++ = in[i] | in[i + 1] << 16;
704 }
705
706 if (i < mesa_ind_buf->count)
707 {
708 *out++ = in[i];
709 }
710
711 #if MESA_BIG_ENDIAN
712 }
713 else
714 { /* if (mesa_ind_buf->type == GL_UNSIGNED_SHORT) */
715 GLushort *in = (GLushort *)src_ptr;
716 GLuint size = sizeof(GLushort) * ((mesa_ind_buf->count + 1) & ~1);
717
718 radeonAllocDmaRegion(&context->radeon, &context->ind_buf.bo,
719 &context->ind_buf.bo_offset, size, 4);
720
721 assert(context->ind_buf.bo->ptr != NULL);
722 out = (GLuint *)ADD_POINTERS(context->ind_buf.bo->ptr, context->ind_buf.bo_offset);
723
724 for (i = 0; i + 1 < mesa_ind_buf->count; i += 2)
725 {
726 *out++ = in[i] | in[i + 1] << 16;
727 }
728
729 if (i < mesa_ind_buf->count)
730 {
731 *out++ = in[i];
732 }
733 #endif
734 }
735
736 context->ind_buf.is_32bit = GL_FALSE;
737 context->ind_buf.count = mesa_ind_buf->count;
738
739 if (mapped_named_bo)
740 {
741 ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, mesa_ind_buf->obj);
742 }
743 }
744
745 static void r700SetupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer *mesa_ind_buf)
746 {
747 context_t *context = R700_CONTEXT(ctx);
748
749 if (!mesa_ind_buf) {
750 context->ind_buf.bo = NULL;
751 return;
752 }
753
754 #if MESA_BIG_ENDIAN
755 if (mesa_ind_buf->type == GL_UNSIGNED_INT)
756 {
757 #else
758 if (mesa_ind_buf->type != GL_UNSIGNED_BYTE)
759 {
760 #endif
761 const GLvoid *src_ptr;
762 GLvoid *dst_ptr;
763 GLboolean mapped_named_bo = GL_FALSE;
764
765 if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer)
766 {
767 ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY_ARB, mesa_ind_buf->obj);
768 assert(mesa_ind_buf->obj->Pointer != NULL);
769 mapped_named_bo = GL_TRUE;
770 }
771
772 src_ptr = ADD_POINTERS(mesa_ind_buf->obj->Pointer, mesa_ind_buf->ptr);
773
774 const GLuint size = mesa_ind_buf->count * getTypeSize(mesa_ind_buf->type);
775
776 radeonAllocDmaRegion(&context->radeon, &context->ind_buf.bo,
777 &context->ind_buf.bo_offset, size, 4);
778 assert(context->ind_buf.bo->ptr != NULL);
779 dst_ptr = ADD_POINTERS(context->ind_buf.bo->ptr, context->ind_buf.bo_offset);
780
781 _mesa_memcpy(dst_ptr, src_ptr, size);
782
783 context->ind_buf.is_32bit = (mesa_ind_buf->type == GL_UNSIGNED_INT);
784 context->ind_buf.count = mesa_ind_buf->count;
785
786 if (mapped_named_bo)
787 {
788 ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, mesa_ind_buf->obj);
789 }
790 }
791 else
792 {
793 r700FixupIndexBuffer(ctx, mesa_ind_buf);
794 }
795 }
796
797 static GLboolean r700TryDrawPrims(GLcontext *ctx,
798 const struct gl_client_array *arrays[],
799 const struct _mesa_prim *prim,
800 GLuint nr_prims,
801 const struct _mesa_index_buffer *ib,
802 GLuint min_index,
803 GLuint max_index )
804 {
805 context_t *context = R700_CONTEXT(ctx);
806 radeonContextPtr radeon = &context->radeon;
807 GLuint i, id = 0;
808 struct radeon_renderbuffer *rrb;
809
810 if (ctx->NewState)
811 _mesa_update_state( ctx );
812
813 _tnl_UpdateFixedFunctionProgram(ctx);
814 r700SetVertexFormat(ctx, arrays, max_index + 1);
815 /* shaders need to be updated before buffers are validated */
816 r700UpdateShaders(ctx);
817 if (!r600ValidateBuffers(ctx))
818 return GL_FALSE;
819
820 /* always emit CB base to prevent
821 * lock ups on some chips.
822 */
823 R600_STATECHANGE(context, cb_target);
824 /* mark vtx as dirty since it changes per-draw */
825 R600_STATECHANGE(context, vtx);
826
827 r700SetScissor(context);
828 r700SetupVertexProgram(ctx);
829 r700SetupFragmentProgram(ctx);
830 r600UpdateTextureState(ctx);
831
832 GLuint emit_end = r700PredictRenderSize(ctx, prim, ib, nr_prims)
833 + context->radeon.cmdbuf.cs->cdw;
834
835 r700SetupIndexBuffer(ctx, ib);
836 r700SetupStreams(ctx, arrays, max_index + 1);
837
838 radeonEmitState(radeon);
839
840 radeon_debug_add_indent();
841 for (i = 0; i < nr_prims; ++i)
842 {
843 r700RunRenderPrimitive(ctx,
844 prim[i].start,
845 prim[i].start + prim[i].count,
846 prim[i].mode);
847 }
848 radeon_debug_remove_indent();
849
850 /* Flush render op cached for last several quads. */
851 r700WaitForIdleClean(context);
852
853 rrb = radeon_get_colorbuffer(&context->radeon);
854 if (rrb && rrb->bo)
855 r700SyncSurf(context, rrb->bo, 0, RADEON_GEM_DOMAIN_VRAM,
856 CB_ACTION_ENA_bit | (1 << (id + 6)));
857
858 rrb = radeon_get_depthbuffer(&context->radeon);
859 if (rrb && rrb->bo)
860 r700SyncSurf(context, rrb->bo, 0, RADEON_GEM_DOMAIN_VRAM,
861 DB_ACTION_ENA_bit | DB_DEST_BASE_ENA_bit);
862
863 r700FreeData(ctx);
864
865 if (emit_end < context->radeon.cmdbuf.cs->cdw)
866 {
867 WARN_ONCE("Rendering was %d commands larger than predicted size."
868 " We might overflow command buffer.\n", context->radeon.cmdbuf.cs->cdw - emit_end);
869 }
870
871 return GL_TRUE;
872 }
873
874 static void r700DrawPrims(GLcontext *ctx,
875 const struct gl_client_array *arrays[],
876 const struct _mesa_prim *prim,
877 GLuint nr_prims,
878 const struct _mesa_index_buffer *ib,
879 GLboolean index_bounds_valid,
880 GLuint min_index,
881 GLuint max_index)
882 {
883 GLboolean retval = GL_FALSE;
884
885 /* This check should get folded into just the places that
886 * min/max index are really needed.
887 */
888 if (!index_bounds_valid) {
889 vbo_get_minmax_index(ctx, prim, ib, &min_index, &max_index);
890 }
891
892 if (min_index) {
893 vbo_rebase_prims( ctx, arrays, prim, nr_prims, ib, min_index, max_index, r700DrawPrims );
894 return;
895 }
896
897 /* Make an attempt at drawing */
898 retval = r700TryDrawPrims(ctx, arrays, prim, nr_prims, ib, min_index, max_index);
899
900 /* If failed run tnl pipeline - it should take care of fallbacks */
901 if (!retval)
902 _tnl_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index);
903 }
904
905 void r700InitDraw(GLcontext *ctx)
906 {
907 struct vbo_context *vbo = vbo_context(ctx);
908
909 /* to be enabled */
910 vbo->draw_prims = r700DrawPrims;
911 }
912
913