[965] Move to using shared texture management code.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_draw_upload.c
1 /**************************************************************************
2 *
3 * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 #include <stdlib.h>
29
30 #include "glheader.h"
31 #include "context.h"
32 #include "state.h"
33 #include "api_validate.h"
34 #include "enums.h"
35
36 #include "brw_draw.h"
37 #include "brw_defines.h"
38 #include "brw_context.h"
39 #include "brw_state.h"
40 #include "brw_fallback.h"
41
42 #include "intel_ioctl.h"
43 #include "intel_batchbuffer.h"
44 #include "intel_buffer_objects.h"
45
46
47 struct brw_array_state {
48 union header_union header;
49
50 struct {
51 union {
52 struct {
53 GLuint pitch:11;
54 GLuint pad:15;
55 GLuint access_type:1;
56 GLuint vb_index:5;
57 } bits;
58 GLuint dword;
59 } vb0;
60
61 dri_bo *buffer;
62 GLuint offset;
63
64 GLuint max_index;
65 GLuint instance_data_step_rate;
66
67 } vb[BRW_VBP_MAX];
68 };
69
70
71 static dri_bo *array_buffer( struct intel_context *intel,
72 const struct gl_client_array *array )
73 {
74 return intel_bufferobj_buffer(intel, intel_buffer_object(array->BufferObj),
75 INTEL_WRITE_PART);
76 }
77
78 static GLuint double_types[5] = {
79 0,
80 BRW_SURFACEFORMAT_R64_FLOAT,
81 BRW_SURFACEFORMAT_R64G64_FLOAT,
82 BRW_SURFACEFORMAT_R64G64B64_FLOAT,
83 BRW_SURFACEFORMAT_R64G64B64A64_FLOAT
84 };
85
86 static GLuint float_types[5] = {
87 0,
88 BRW_SURFACEFORMAT_R32_FLOAT,
89 BRW_SURFACEFORMAT_R32G32_FLOAT,
90 BRW_SURFACEFORMAT_R32G32B32_FLOAT,
91 BRW_SURFACEFORMAT_R32G32B32A32_FLOAT
92 };
93
94 static GLuint uint_types_norm[5] = {
95 0,
96 BRW_SURFACEFORMAT_R32_UNORM,
97 BRW_SURFACEFORMAT_R32G32_UNORM,
98 BRW_SURFACEFORMAT_R32G32B32_UNORM,
99 BRW_SURFACEFORMAT_R32G32B32A32_UNORM
100 };
101
102 static GLuint uint_types_scale[5] = {
103 0,
104 BRW_SURFACEFORMAT_R32_USCALED,
105 BRW_SURFACEFORMAT_R32G32_USCALED,
106 BRW_SURFACEFORMAT_R32G32B32_USCALED,
107 BRW_SURFACEFORMAT_R32G32B32A32_USCALED
108 };
109
110 static GLuint int_types_norm[5] = {
111 0,
112 BRW_SURFACEFORMAT_R32_SNORM,
113 BRW_SURFACEFORMAT_R32G32_SNORM,
114 BRW_SURFACEFORMAT_R32G32B32_SNORM,
115 BRW_SURFACEFORMAT_R32G32B32A32_SNORM
116 };
117
118 static GLuint int_types_scale[5] = {
119 0,
120 BRW_SURFACEFORMAT_R32_SSCALED,
121 BRW_SURFACEFORMAT_R32G32_SSCALED,
122 BRW_SURFACEFORMAT_R32G32B32_SSCALED,
123 BRW_SURFACEFORMAT_R32G32B32A32_SSCALED
124 };
125
126 static GLuint ushort_types_norm[5] = {
127 0,
128 BRW_SURFACEFORMAT_R16_UNORM,
129 BRW_SURFACEFORMAT_R16G16_UNORM,
130 BRW_SURFACEFORMAT_R16G16B16_UNORM,
131 BRW_SURFACEFORMAT_R16G16B16A16_UNORM
132 };
133
134 static GLuint ushort_types_scale[5] = {
135 0,
136 BRW_SURFACEFORMAT_R16_USCALED,
137 BRW_SURFACEFORMAT_R16G16_USCALED,
138 BRW_SURFACEFORMAT_R16G16B16_USCALED,
139 BRW_SURFACEFORMAT_R16G16B16A16_USCALED
140 };
141
142 static GLuint short_types_norm[5] = {
143 0,
144 BRW_SURFACEFORMAT_R16_SNORM,
145 BRW_SURFACEFORMAT_R16G16_SNORM,
146 BRW_SURFACEFORMAT_R16G16B16_SNORM,
147 BRW_SURFACEFORMAT_R16G16B16A16_SNORM
148 };
149
150 static GLuint short_types_scale[5] = {
151 0,
152 BRW_SURFACEFORMAT_R16_SSCALED,
153 BRW_SURFACEFORMAT_R16G16_SSCALED,
154 BRW_SURFACEFORMAT_R16G16B16_SSCALED,
155 BRW_SURFACEFORMAT_R16G16B16A16_SSCALED
156 };
157
158 static GLuint ubyte_types_norm[5] = {
159 0,
160 BRW_SURFACEFORMAT_R8_UNORM,
161 BRW_SURFACEFORMAT_R8G8_UNORM,
162 BRW_SURFACEFORMAT_R8G8B8_UNORM,
163 BRW_SURFACEFORMAT_R8G8B8A8_UNORM
164 };
165
166 static GLuint ubyte_types_scale[5] = {
167 0,
168 BRW_SURFACEFORMAT_R8_USCALED,
169 BRW_SURFACEFORMAT_R8G8_USCALED,
170 BRW_SURFACEFORMAT_R8G8B8_USCALED,
171 BRW_SURFACEFORMAT_R8G8B8A8_USCALED
172 };
173
174 static GLuint byte_types_norm[5] = {
175 0,
176 BRW_SURFACEFORMAT_R8_SNORM,
177 BRW_SURFACEFORMAT_R8G8_SNORM,
178 BRW_SURFACEFORMAT_R8G8B8_SNORM,
179 BRW_SURFACEFORMAT_R8G8B8A8_SNORM
180 };
181
182 static GLuint byte_types_scale[5] = {
183 0,
184 BRW_SURFACEFORMAT_R8_SSCALED,
185 BRW_SURFACEFORMAT_R8G8_SSCALED,
186 BRW_SURFACEFORMAT_R8G8B8_SSCALED,
187 BRW_SURFACEFORMAT_R8G8B8A8_SSCALED
188 };
189
190
191 static GLuint get_surface_type( GLenum type, GLuint size, GLboolean normalized )
192 {
193 if (INTEL_DEBUG & DEBUG_VERTS)
194 _mesa_printf("type %s size %d normalized %d\n",
195 _mesa_lookup_enum_by_nr(type), size, normalized);
196
197 if (normalized) {
198 switch (type) {
199 case GL_DOUBLE: return double_types[size];
200 case GL_FLOAT: return float_types[size];
201 case GL_INT: return int_types_norm[size];
202 case GL_SHORT: return short_types_norm[size];
203 case GL_BYTE: return byte_types_norm[size];
204 case GL_UNSIGNED_INT: return uint_types_norm[size];
205 case GL_UNSIGNED_SHORT: return ushort_types_norm[size];
206 case GL_UNSIGNED_BYTE: return ubyte_types_norm[size];
207 default: assert(0); return 0;
208 }
209 }
210 else {
211 switch (type) {
212 case GL_DOUBLE: return double_types[size];
213 case GL_FLOAT: return float_types[size];
214 case GL_INT: return int_types_scale[size];
215 case GL_SHORT: return short_types_scale[size];
216 case GL_BYTE: return byte_types_scale[size];
217 case GL_UNSIGNED_INT: return uint_types_scale[size];
218 case GL_UNSIGNED_SHORT: return ushort_types_scale[size];
219 case GL_UNSIGNED_BYTE: return ubyte_types_scale[size];
220 default: assert(0); return 0;
221 }
222 }
223 }
224
225
226 static GLuint get_size( GLenum type )
227 {
228 switch (type) {
229 case GL_DOUBLE: return sizeof(GLdouble);
230 case GL_FLOAT: return sizeof(GLfloat);
231 case GL_INT: return sizeof(GLint);
232 case GL_SHORT: return sizeof(GLshort);
233 case GL_BYTE: return sizeof(GLbyte);
234 case GL_UNSIGNED_INT: return sizeof(GLuint);
235 case GL_UNSIGNED_SHORT: return sizeof(GLushort);
236 case GL_UNSIGNED_BYTE: return sizeof(GLubyte);
237 default: return 0;
238 }
239 }
240
241 static GLuint get_index_type(GLenum type)
242 {
243 switch (type) {
244 case GL_UNSIGNED_BYTE: return BRW_INDEX_BYTE;
245 case GL_UNSIGNED_SHORT: return BRW_INDEX_WORD;
246 case GL_UNSIGNED_INT: return BRW_INDEX_DWORD;
247 default: assert(0); return 0;
248 }
249 }
250
251 static void copy_strided_array( GLubyte *dest,
252 const GLubyte *src,
253 GLuint size,
254 GLuint stride,
255 GLuint count )
256 {
257 if (size == stride)
258 memcpy(dest, src, count * size);
259 else {
260 GLuint i,j;
261
262 for (i = 0; i < count; i++) {
263 for (j = 0; j < size; j++)
264 *dest++ = *src++;
265 src += (stride - size);
266 }
267 }
268 }
269
270 static void wrap_buffers( struct brw_context *brw,
271 GLuint size )
272 {
273 GLcontext *ctx = &brw->intel.ctx;
274
275 if (size < BRW_UPLOAD_INIT_SIZE)
276 size = BRW_UPLOAD_INIT_SIZE;
277
278 brw->vb.upload.buf++;
279 brw->vb.upload.buf %= BRW_NR_UPLOAD_BUFS;
280 brw->vb.upload.offset = 0;
281
282 ctx->Driver.BufferData(ctx,
283 GL_ARRAY_BUFFER_ARB,
284 size,
285 NULL,
286 GL_DYNAMIC_DRAW_ARB,
287 brw->vb.upload.vbo[brw->vb.upload.buf]);
288 }
289
290 static void get_space( struct brw_context *brw,
291 GLuint size,
292 struct gl_buffer_object **vbo_return,
293 GLuint *offset_return )
294 {
295 size = ALIGN(size, 64);
296
297 if (brw->vb.upload.offset + size > BRW_UPLOAD_INIT_SIZE)
298 wrap_buffers(brw, size);
299
300 *vbo_return = brw->vb.upload.vbo[brw->vb.upload.buf];
301 *offset_return = brw->vb.upload.offset;
302
303 brw->vb.upload.offset += size;
304 }
305
306
307
308 static struct gl_client_array *
309 copy_array_to_vbo_array( struct brw_context *brw,
310 GLuint i,
311 const struct gl_client_array *array,
312 GLuint element_size,
313 GLuint count)
314 {
315 GLcontext *ctx = &brw->intel.ctx;
316 struct gl_client_array *vbo_array = &brw->vb.vbo_array[i];
317 GLuint size = count * element_size;
318 struct gl_buffer_object *vbo;
319 GLuint offset;
320 GLuint new_stride;
321
322 get_space(brw, size, &vbo, &offset);
323
324 if (array->StrideB == 0) {
325 assert(count == 1);
326 new_stride = 0;
327 }
328 else
329 new_stride = element_size;
330
331 vbo_array->Size = array->Size;
332 vbo_array->Type = array->Type;
333 vbo_array->Stride = new_stride;
334 vbo_array->StrideB = new_stride;
335 vbo_array->Ptr = (const void *)offset;
336 vbo_array->Enabled = 1;
337 vbo_array->Normalized = array->Normalized;
338 vbo_array->_MaxElement = array->_MaxElement; /* ? */
339 vbo_array->BufferObj = vbo;
340
341 {
342 GLubyte *map = ctx->Driver.MapBuffer(ctx,
343 GL_ARRAY_BUFFER_ARB,
344 GL_DYNAMIC_DRAW_ARB,
345 vbo);
346
347 map += offset;
348
349 copy_strided_array( map,
350 array->Ptr,
351 element_size,
352 array->StrideB,
353 count);
354
355 ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER_ARB, vbo_array->BufferObj);
356 }
357
358 return vbo_array;
359 }
360
361
362
363 static struct gl_client_array *
364 interleaved_vbo_array( struct brw_context *brw,
365 GLuint i,
366 const struct gl_client_array *uploaded_array,
367 const struct gl_client_array *array,
368 const char *ptr)
369 {
370 struct gl_client_array *vbo_array = &brw->vb.vbo_array[i];
371
372 vbo_array->Size = array->Size;
373 vbo_array->Type = array->Type;
374 vbo_array->Stride = array->Stride;
375 vbo_array->StrideB = array->StrideB;
376 vbo_array->Ptr = (const void *)((const char *)uploaded_array->Ptr +
377 ((const char *)array->Ptr - ptr));
378 vbo_array->Enabled = 1;
379 vbo_array->Normalized = array->Normalized;
380 vbo_array->_MaxElement = array->_MaxElement;
381 vbo_array->BufferObj = uploaded_array->BufferObj;
382
383 return vbo_array;
384 }
385
386
387 GLboolean brw_upload_vertices( struct brw_context *brw,
388 GLuint min_index,
389 GLuint max_index )
390 {
391 GLcontext *ctx = &brw->intel.ctx;
392 struct intel_context *intel = intel_context(ctx);
393 GLuint tmp = brw->vs.prog_data->inputs_read;
394 struct brw_vertex_element_packet vep;
395 struct brw_array_state vbp;
396 GLuint i;
397 const void *ptr = NULL;
398 GLuint interleave = 0;
399
400 struct brw_vertex_element *enabled[VERT_ATTRIB_MAX];
401 GLuint nr_enabled = 0;
402
403 struct brw_vertex_element *upload[VERT_ATTRIB_MAX];
404 GLuint nr_uploads = 0;
405
406
407 memset(&vbp, 0, sizeof(vbp));
408 memset(&vep, 0, sizeof(vep));
409
410 /* First build an array of pointers to ve's in vb.inputs_read
411 */
412 if (0)
413 _mesa_printf("%s %d..%d\n", __FUNCTION__, min_index, max_index);
414
415 while (tmp) {
416 GLuint i = _mesa_ffsll(tmp)-1;
417 struct brw_vertex_element *input = &brw->vb.inputs[i];
418
419 tmp &= ~(1<<i);
420 enabled[nr_enabled++] = input;
421
422 input->index = i;
423 input->element_size = get_size(input->glarray->Type) * input->glarray->Size;
424 input->count = input->glarray->StrideB ? max_index + 1 - min_index : 1;
425
426 if (!input->glarray->BufferObj->Name) {
427 if (i == 0) {
428 /* Position array not properly enabled:
429 */
430 if (input->glarray->StrideB == 0)
431 return GL_FALSE;
432
433 interleave = input->glarray->StrideB;
434 ptr = input->glarray->Ptr;
435 }
436 else if (interleave != input->glarray->StrideB ||
437 (const char *)input->glarray->Ptr - (const char *)ptr < 0 ||
438 (const char *)input->glarray->Ptr - (const char *)ptr > interleave) {
439 interleave = 0;
440 }
441
442 upload[nr_uploads++] = input;
443
444 /* We rebase drawing to start at element zero only when
445 * varyings are not in vbos, which means we can end up
446 * uploading non-varying arrays (stride != 0) when min_index
447 * is zero. This doesn't matter as the amount to upload is
448 * the same for these arrays whether the draw call is rebased
449 * or not - we just have to upload the one element.
450 */
451 assert(min_index == 0 || input->glarray->StrideB == 0);
452 }
453 }
454
455 /* Upload interleaved arrays if all uploads are interleaved
456 */
457 if (nr_uploads > 1 &&
458 interleave &&
459 interleave <= 256) {
460 struct brw_vertex_element *input0 = upload[0];
461
462 input0->glarray = copy_array_to_vbo_array(brw, 0,
463 input0->glarray,
464 interleave,
465 input0->count);
466
467 for (i = 1; i < nr_uploads; i++) {
468 upload[i]->glarray = interleaved_vbo_array(brw,
469 i,
470 input0->glarray,
471 upload[i]->glarray,
472 ptr);
473 }
474 }
475 else {
476 for (i = 0; i < nr_uploads; i++) {
477 struct brw_vertex_element *input = upload[i];
478
479 input->glarray = copy_array_to_vbo_array(brw, i,
480 input->glarray,
481 input->element_size,
482 input->count);
483
484 }
485 }
486
487 /* XXX: In the rare cases where this happens we fallback all
488 * the way to software rasterization, although a tnl fallback
489 * would be sufficient. I don't know of *any* real world
490 * cases with > 17 vertex attributes enabled, so it probably
491 * isn't an issue at this point.
492 */
493 if (nr_enabled >= BRW_VEP_MAX)
494 return GL_FALSE;
495
496 /* This still defines a hardware VB for each input, even if they
497 * are interleaved or from the same VBO. TBD if this makes a
498 * performance difference.
499 */
500 for (i = 0; i < nr_enabled; i++) {
501 struct brw_vertex_element *input = enabled[i];
502
503 input->vep = &vep.ve[i];
504 input->vep->ve0.src_format = get_surface_type(input->glarray->Type,
505 input->glarray->Size,
506 input->glarray->Normalized);
507 input->vep->ve0.valid = 1;
508 input->vep->ve1.dst_offset = (i) * 4;
509 input->vep->ve1.vfcomponent3 = BRW_VFCOMPONENT_STORE_SRC;
510 input->vep->ve1.vfcomponent2 = BRW_VFCOMPONENT_STORE_SRC;
511 input->vep->ve1.vfcomponent1 = BRW_VFCOMPONENT_STORE_SRC;
512 input->vep->ve1.vfcomponent0 = BRW_VFCOMPONENT_STORE_SRC;
513
514 switch (input->glarray->Size) {
515 case 0: input->vep->ve1.vfcomponent0 = BRW_VFCOMPONENT_STORE_0;
516 case 1: input->vep->ve1.vfcomponent1 = BRW_VFCOMPONENT_STORE_0;
517 case 2: input->vep->ve1.vfcomponent2 = BRW_VFCOMPONENT_STORE_0;
518 case 3: input->vep->ve1.vfcomponent3 = BRW_VFCOMPONENT_STORE_1_FLT;
519 break;
520 }
521
522 input->vep->ve0.vertex_buffer_index = i;
523 input->vep->ve0.src_offset = 0;
524
525 vbp.vb[i].vb0.bits.pitch = input->glarray->StrideB;
526 vbp.vb[i].vb0.bits.pad = 0;
527 vbp.vb[i].vb0.bits.access_type = BRW_VERTEXBUFFER_ACCESS_VERTEXDATA;
528 vbp.vb[i].vb0.bits.vb_index = i;
529 vbp.vb[i].offset = (GLuint)input->glarray->Ptr;
530 vbp.vb[i].buffer = array_buffer(intel, input->glarray);
531 vbp.vb[i].max_index = max_index;
532 }
533
534
535
536 /* Now emit VB and VEP state packets:
537 */
538 vbp.header.bits.length = (1 + nr_enabled * 4) - 2;
539 vbp.header.bits.opcode = CMD_VERTEX_BUFFER;
540
541 BEGIN_BATCH(vbp.header.bits.length+2, 0);
542 OUT_BATCH( vbp.header.dword );
543
544 for (i = 0; i < nr_enabled; i++) {
545 OUT_BATCH( vbp.vb[i].vb0.dword );
546 OUT_RELOC( vbp.vb[i].buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ,
547 vbp.vb[i].offset);
548 OUT_BATCH( vbp.vb[i].max_index );
549 OUT_BATCH( vbp.vb[i].instance_data_step_rate );
550 }
551 ADVANCE_BATCH();
552
553 vep.header.length = (1 + nr_enabled * sizeof(vep.ve[0])/4) - 2;
554 vep.header.opcode = CMD_VERTEX_ELEMENT;
555 brw_cached_batch_struct(brw, &vep, 4 + nr_enabled * sizeof(vep.ve[0]));
556
557 return GL_TRUE;
558 }
559
560 void brw_upload_indices( struct brw_context *brw,
561 const struct _mesa_index_buffer *index_buffer )
562 {
563 GLcontext *ctx = &brw->intel.ctx;
564 struct intel_context *intel = &brw->intel;
565 GLuint ib_size = get_size(index_buffer->type) * index_buffer->count;
566 struct gl_buffer_object *bufferobj = index_buffer->obj;
567 GLuint offset = (GLuint)index_buffer->ptr;
568
569 /* Turn into a proper VBO:
570 */
571 if (!bufferobj->Name) {
572
573 /* Get new bufferobj, offset:
574 */
575 get_space(brw, ib_size, &bufferobj, &offset);
576
577 /* Straight upload
578 */
579 ctx->Driver.BufferSubData( ctx,
580 GL_ELEMENT_ARRAY_BUFFER_ARB,
581 offset,
582 ib_size,
583 index_buffer->ptr,
584 bufferobj);
585 } else {
586 if (((1 << get_index_type(index_buffer->type)) - 1) & offset) {
587 struct gl_buffer_object *vbo;
588 GLuint voffset;
589 GLubyte *map = ctx->Driver.MapBuffer(ctx,
590 GL_ELEMENT_ARRAY_BUFFER_ARB,
591 GL_DYNAMIC_DRAW_ARB,
592 bufferobj);
593 map += offset;
594 get_space(brw, ib_size, &vbo, &voffset);
595
596 ctx->Driver.BufferSubData(ctx,
597 GL_ELEMENT_ARRAY_BUFFER_ARB,
598 voffset,
599 ib_size,
600 map,
601 vbo);
602 ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER_ARB, bufferobj);
603
604 bufferobj = vbo;
605 offset = voffset;
606 }
607 }
608
609 /* Emit the indexbuffer packet:
610 */
611 {
612 struct brw_indexbuffer ib;
613 dri_bo *buffer = intel_bufferobj_buffer(intel,
614 intel_buffer_object(bufferobj),
615 INTEL_READ);
616
617 memset(&ib, 0, sizeof(ib));
618
619 ib.header.bits.opcode = CMD_INDEX_BUFFER;
620 ib.header.bits.length = sizeof(ib)/4 - 2;
621 ib.header.bits.index_format = get_index_type(index_buffer->type);
622 ib.header.bits.cut_index_enable = 0;
623
624
625 BEGIN_BATCH(4, 0);
626 OUT_BATCH( ib.header.dword );
627 OUT_RELOC( buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, offset);
628 OUT_RELOC( buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ,
629 offset + ib_size);
630 OUT_BATCH( 0 );
631 ADVANCE_BATCH();
632 }
633 }