Merge branch 'i915-unification' of git+ssh://people.freedesktop.org/~anholt/mesa...
[mesa.git] / src / mesa / drivers / dri / i965 / brw_draw_upload.c
1 /**************************************************************************
2 *
3 * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 #include <stdlib.h>
29
30 #include "glheader.h"
31 #include "context.h"
32 #include "state.h"
33 #include "api_validate.h"
34 #include "enums.h"
35
36 #include "brw_draw.h"
37 #include "brw_defines.h"
38 #include "brw_context.h"
39 #include "brw_aub.h"
40 #include "brw_state.h"
41 #include "brw_fallback.h"
42
43 #include "intel_ioctl.h"
44 #include "intel_batchbuffer.h"
45 #include "intel_buffer_objects.h"
46
47
48 struct brw_array_state {
49 union header_union header;
50
51 struct {
52 union {
53 struct {
54 GLuint pitch:11;
55 GLuint pad:15;
56 GLuint access_type:1;
57 GLuint vb_index:5;
58 } bits;
59 GLuint dword;
60 } vb0;
61
62 struct buffer *buffer;
63 GLuint offset;
64
65 GLuint max_index;
66 GLuint instance_data_step_rate;
67
68 } vb[BRW_VBP_MAX];
69 };
70
71
72 static struct buffer *array_buffer( const struct gl_client_array *array )
73 {
74 return intel_bufferobj_buffer(intel_buffer_object(array->BufferObj));
75 }
76
77 static GLuint double_types[5] = {
78 0,
79 BRW_SURFACEFORMAT_R64_FLOAT,
80 BRW_SURFACEFORMAT_R64G64_FLOAT,
81 BRW_SURFACEFORMAT_R64G64B64_FLOAT,
82 BRW_SURFACEFORMAT_R64G64B64A64_FLOAT
83 };
84
85 static GLuint float_types[5] = {
86 0,
87 BRW_SURFACEFORMAT_R32_FLOAT,
88 BRW_SURFACEFORMAT_R32G32_FLOAT,
89 BRW_SURFACEFORMAT_R32G32B32_FLOAT,
90 BRW_SURFACEFORMAT_R32G32B32A32_FLOAT
91 };
92
93 static GLuint uint_types_norm[5] = {
94 0,
95 BRW_SURFACEFORMAT_R32_UNORM,
96 BRW_SURFACEFORMAT_R32G32_UNORM,
97 BRW_SURFACEFORMAT_R32G32B32_UNORM,
98 BRW_SURFACEFORMAT_R32G32B32A32_UNORM
99 };
100
101 static GLuint uint_types_scale[5] = {
102 0,
103 BRW_SURFACEFORMAT_R32_USCALED,
104 BRW_SURFACEFORMAT_R32G32_USCALED,
105 BRW_SURFACEFORMAT_R32G32B32_USCALED,
106 BRW_SURFACEFORMAT_R32G32B32A32_USCALED
107 };
108
109 static GLuint int_types_norm[5] = {
110 0,
111 BRW_SURFACEFORMAT_R32_SNORM,
112 BRW_SURFACEFORMAT_R32G32_SNORM,
113 BRW_SURFACEFORMAT_R32G32B32_SNORM,
114 BRW_SURFACEFORMAT_R32G32B32A32_SNORM
115 };
116
117 static GLuint int_types_scale[5] = {
118 0,
119 BRW_SURFACEFORMAT_R32_SSCALED,
120 BRW_SURFACEFORMAT_R32G32_SSCALED,
121 BRW_SURFACEFORMAT_R32G32B32_SSCALED,
122 BRW_SURFACEFORMAT_R32G32B32A32_SSCALED
123 };
124
125 static GLuint ushort_types_norm[5] = {
126 0,
127 BRW_SURFACEFORMAT_R16_UNORM,
128 BRW_SURFACEFORMAT_R16G16_UNORM,
129 BRW_SURFACEFORMAT_R16G16B16_UNORM,
130 BRW_SURFACEFORMAT_R16G16B16A16_UNORM
131 };
132
133 static GLuint ushort_types_scale[5] = {
134 0,
135 BRW_SURFACEFORMAT_R16_USCALED,
136 BRW_SURFACEFORMAT_R16G16_USCALED,
137 BRW_SURFACEFORMAT_R16G16B16_USCALED,
138 BRW_SURFACEFORMAT_R16G16B16A16_USCALED
139 };
140
141 static GLuint short_types_norm[5] = {
142 0,
143 BRW_SURFACEFORMAT_R16_SNORM,
144 BRW_SURFACEFORMAT_R16G16_SNORM,
145 BRW_SURFACEFORMAT_R16G16B16_SNORM,
146 BRW_SURFACEFORMAT_R16G16B16A16_SNORM
147 };
148
149 static GLuint short_types_scale[5] = {
150 0,
151 BRW_SURFACEFORMAT_R16_SSCALED,
152 BRW_SURFACEFORMAT_R16G16_SSCALED,
153 BRW_SURFACEFORMAT_R16G16B16_SSCALED,
154 BRW_SURFACEFORMAT_R16G16B16A16_SSCALED
155 };
156
157 static GLuint ubyte_types_norm[5] = {
158 0,
159 BRW_SURFACEFORMAT_R8_UNORM,
160 BRW_SURFACEFORMAT_R8G8_UNORM,
161 BRW_SURFACEFORMAT_R8G8B8_UNORM,
162 BRW_SURFACEFORMAT_R8G8B8A8_UNORM
163 };
164
165 static GLuint ubyte_types_scale[5] = {
166 0,
167 BRW_SURFACEFORMAT_R8_USCALED,
168 BRW_SURFACEFORMAT_R8G8_USCALED,
169 BRW_SURFACEFORMAT_R8G8B8_USCALED,
170 BRW_SURFACEFORMAT_R8G8B8A8_USCALED
171 };
172
173 static GLuint byte_types_norm[5] = {
174 0,
175 BRW_SURFACEFORMAT_R8_SNORM,
176 BRW_SURFACEFORMAT_R8G8_SNORM,
177 BRW_SURFACEFORMAT_R8G8B8_SNORM,
178 BRW_SURFACEFORMAT_R8G8B8A8_SNORM
179 };
180
181 static GLuint byte_types_scale[5] = {
182 0,
183 BRW_SURFACEFORMAT_R8_SSCALED,
184 BRW_SURFACEFORMAT_R8G8_SSCALED,
185 BRW_SURFACEFORMAT_R8G8B8_SSCALED,
186 BRW_SURFACEFORMAT_R8G8B8A8_SSCALED
187 };
188
189
190 static GLuint get_surface_type( GLenum type, GLuint size, GLboolean normalized )
191 {
192 if (INTEL_DEBUG & DEBUG_VERTS)
193 _mesa_printf("type %s size %d normalized %d\n",
194 _mesa_lookup_enum_by_nr(type), size, normalized);
195
196 if (normalized) {
197 switch (type) {
198 case GL_DOUBLE: return double_types[size];
199 case GL_FLOAT: return float_types[size];
200 case GL_INT: return int_types_norm[size];
201 case GL_SHORT: return short_types_norm[size];
202 case GL_BYTE: return byte_types_norm[size];
203 case GL_UNSIGNED_INT: return uint_types_norm[size];
204 case GL_UNSIGNED_SHORT: return ushort_types_norm[size];
205 case GL_UNSIGNED_BYTE: return ubyte_types_norm[size];
206 default: assert(0); return 0;
207 }
208 }
209 else {
210 switch (type) {
211 case GL_DOUBLE: return double_types[size];
212 case GL_FLOAT: return float_types[size];
213 case GL_INT: return int_types_scale[size];
214 case GL_SHORT: return short_types_scale[size];
215 case GL_BYTE: return byte_types_scale[size];
216 case GL_UNSIGNED_INT: return uint_types_scale[size];
217 case GL_UNSIGNED_SHORT: return ushort_types_scale[size];
218 case GL_UNSIGNED_BYTE: return ubyte_types_scale[size];
219 default: assert(0); return 0;
220 }
221 }
222 }
223
224
225 static GLuint get_size( GLenum type )
226 {
227 switch (type) {
228 case GL_DOUBLE: return sizeof(GLdouble);
229 case GL_FLOAT: return sizeof(GLfloat);
230 case GL_INT: return sizeof(GLint);
231 case GL_SHORT: return sizeof(GLshort);
232 case GL_BYTE: return sizeof(GLbyte);
233 case GL_UNSIGNED_INT: return sizeof(GLuint);
234 case GL_UNSIGNED_SHORT: return sizeof(GLushort);
235 case GL_UNSIGNED_BYTE: return sizeof(GLubyte);
236 default: return 0;
237 }
238 }
239
240 static GLuint get_index_type(GLenum type)
241 {
242 switch (type) {
243 case GL_UNSIGNED_BYTE: return BRW_INDEX_BYTE;
244 case GL_UNSIGNED_SHORT: return BRW_INDEX_WORD;
245 case GL_UNSIGNED_INT: return BRW_INDEX_DWORD;
246 default: assert(0); return 0;
247 }
248 }
249
250 static void copy_strided_array( GLubyte *dest,
251 const GLubyte *src,
252 GLuint size,
253 GLuint stride,
254 GLuint count )
255 {
256 if (size == stride)
257 do_memcpy(dest, src, count * size);
258 else {
259 GLuint i,j;
260
261 for (i = 0; i < count; i++) {
262 for (j = 0; j < size; j++)
263 *dest++ = *src++;
264 src += (stride - size);
265 }
266 }
267 }
268
269 static void wrap_buffers( struct brw_context *brw,
270 GLuint size )
271 {
272 GLcontext *ctx = &brw->intel.ctx;
273
274 if (size < BRW_UPLOAD_INIT_SIZE)
275 size = BRW_UPLOAD_INIT_SIZE;
276
277 brw->vb.upload.buf++;
278 brw->vb.upload.buf %= BRW_NR_UPLOAD_BUFS;
279 brw->vb.upload.offset = 0;
280
281 ctx->Driver.BufferData(ctx,
282 GL_ARRAY_BUFFER_ARB,
283 size,
284 NULL,
285 GL_DYNAMIC_DRAW_ARB,
286 brw->vb.upload.vbo[brw->vb.upload.buf]);
287 }
288
289 static void get_space( struct brw_context *brw,
290 GLuint size,
291 struct gl_buffer_object **vbo_return,
292 GLuint *offset_return )
293 {
294 size = (size + 63) & ~63;
295
296 if (brw->vb.upload.offset + size > BRW_UPLOAD_INIT_SIZE)
297 wrap_buffers(brw, size);
298
299 *vbo_return = brw->vb.upload.vbo[brw->vb.upload.buf];
300 *offset_return = brw->vb.upload.offset;
301
302 brw->vb.upload.offset += size;
303 }
304
305
306
307 static struct gl_client_array *
308 copy_array_to_vbo_array( struct brw_context *brw,
309 GLuint i,
310 const struct gl_client_array *array,
311 GLuint element_size,
312 GLuint count)
313 {
314 GLcontext *ctx = &brw->intel.ctx;
315 struct gl_client_array *vbo_array = &brw->vb.vbo_array[i];
316 GLuint size = count * element_size;
317 struct gl_buffer_object *vbo;
318 GLuint offset;
319 GLuint new_stride;
320
321 get_space(brw, size, &vbo, &offset);
322
323 if (array->StrideB == 0) {
324 assert(count == 1);
325 new_stride = 0;
326 }
327 else
328 new_stride = element_size;
329
330 vbo_array->Size = array->Size;
331 vbo_array->Type = array->Type;
332 vbo_array->Stride = new_stride;
333 vbo_array->StrideB = new_stride;
334 vbo_array->Ptr = (const void *)offset;
335 vbo_array->Enabled = 1;
336 vbo_array->Normalized = array->Normalized;
337 vbo_array->_MaxElement = array->_MaxElement; /* ? */
338 vbo_array->BufferObj = vbo;
339
340 {
341 GLubyte *map = ctx->Driver.MapBuffer(ctx,
342 GL_ARRAY_BUFFER_ARB,
343 GL_DYNAMIC_DRAW_ARB,
344 vbo);
345
346 map += offset;
347
348 copy_strided_array( map,
349 array->Ptr,
350 element_size,
351 array->StrideB,
352 count);
353
354 ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER_ARB, vbo_array->BufferObj);
355 }
356
357 return vbo_array;
358 }
359
360
361
362 static struct gl_client_array *
363 interleaved_vbo_array( struct brw_context *brw,
364 GLuint i,
365 const struct gl_client_array *uploaded_array,
366 const struct gl_client_array *array,
367 const char *ptr)
368 {
369 struct gl_client_array *vbo_array = &brw->vb.vbo_array[i];
370
371 vbo_array->Size = array->Size;
372 vbo_array->Type = array->Type;
373 vbo_array->Stride = array->Stride;
374 vbo_array->StrideB = array->StrideB;
375 vbo_array->Ptr = (const void *)((const char *)uploaded_array->Ptr +
376 ((const char *)array->Ptr - ptr));
377 vbo_array->Enabled = 1;
378 vbo_array->Normalized = array->Normalized;
379 vbo_array->_MaxElement = array->_MaxElement;
380 vbo_array->BufferObj = uploaded_array->BufferObj;
381
382 return vbo_array;
383 }
384
385
386 GLboolean brw_upload_vertices( struct brw_context *brw,
387 GLuint min_index,
388 GLuint max_index )
389 {
390 GLcontext *ctx = &brw->intel.ctx;
391 struct intel_context *intel = intel_context(ctx);
392 GLuint tmp = brw->vs.prog_data->inputs_read;
393 struct brw_vertex_element_packet vep;
394 struct brw_array_state vbp;
395 GLuint i;
396 const void *ptr = NULL;
397 GLuint interleave = 0;
398
399 struct brw_vertex_element *enabled[VERT_ATTRIB_MAX];
400 GLuint nr_enabled = 0;
401
402 struct brw_vertex_element *upload[VERT_ATTRIB_MAX];
403 GLuint nr_uploads = 0;
404
405
406 memset(&vbp, 0, sizeof(vbp));
407 memset(&vep, 0, sizeof(vep));
408
409 /* First build an array of pointers to ve's in vb.inputs_read
410 */
411 if (0)
412 _mesa_printf("%s %d..%d\n", __FUNCTION__, min_index, max_index);
413
414 while (tmp) {
415 GLuint i = _mesa_ffsll(tmp)-1;
416 struct brw_vertex_element *input = &brw->vb.inputs[i];
417
418 tmp &= ~(1<<i);
419 enabled[nr_enabled++] = input;
420
421 input->index = i;
422 input->element_size = get_size(input->glarray->Type) * input->glarray->Size;
423 input->count = input->glarray->StrideB ? max_index + 1 - min_index : 1;
424
425 if (!input->glarray->BufferObj->Name) {
426 if (i == 0) {
427 /* Position array not properly enabled:
428 */
429 if (input->glarray->StrideB == 0)
430 return GL_FALSE;
431
432 interleave = input->glarray->StrideB;
433 ptr = input->glarray->Ptr;
434 }
435 else if (interleave != input->glarray->StrideB ||
436 (const char *)input->glarray->Ptr - (const char *)ptr < 0 ||
437 (const char *)input->glarray->Ptr - (const char *)ptr > interleave) {
438 interleave = 0;
439 }
440
441 upload[nr_uploads++] = input;
442
443 /* We rebase drawing to start at element zero only when
444 * varyings are not in vbos, which means we can end up
445 * uploading non-varying arrays (stride != 0) when min_index
446 * is zero. This doesn't matter as the amount to upload is
447 * the same for these arrays whether the draw call is rebased
448 * or not - we just have to upload the one element.
449 */
450 assert(min_index == 0 || input->glarray->StrideB == 0);
451 }
452 }
453
454 /* Upload interleaved arrays if all uploads are interleaved
455 */
456 if (nr_uploads > 1 &&
457 interleave &&
458 interleave <= 256) {
459 struct brw_vertex_element *input0 = upload[0];
460
461 input0->glarray = copy_array_to_vbo_array(brw, 0,
462 input0->glarray,
463 interleave,
464 input0->count);
465
466 for (i = 1; i < nr_uploads; i++) {
467 upload[i]->glarray = interleaved_vbo_array(brw,
468 i,
469 input0->glarray,
470 upload[i]->glarray,
471 ptr);
472 }
473 }
474 else {
475 for (i = 0; i < nr_uploads; i++) {
476 struct brw_vertex_element *input = upload[i];
477
478 input->glarray = copy_array_to_vbo_array(brw, i,
479 input->glarray,
480 input->element_size,
481 input->count);
482
483 }
484 }
485
486 /* XXX: In the rare cases where this happens we fallback all
487 * the way to software rasterization, although a tnl fallback
488 * would be sufficient. I don't know of *any* real world
489 * cases with > 17 vertex attributes enabled, so it probably
490 * isn't an issue at this point.
491 */
492 if (nr_enabled >= BRW_VEP_MAX)
493 return GL_FALSE;
494
495 /* This still defines a hardware VB for each input, even if they
496 * are interleaved or from the same VBO. TBD if this makes a
497 * performance difference.
498 */
499 for (i = 0; i < nr_enabled; i++) {
500 struct brw_vertex_element *input = enabled[i];
501
502 input->vep = &vep.ve[i];
503 input->vep->ve0.src_format = get_surface_type(input->glarray->Type,
504 input->glarray->Size,
505 input->glarray->Normalized);
506 input->vep->ve0.valid = 1;
507 input->vep->ve1.dst_offset = (i) * 4;
508 input->vep->ve1.vfcomponent3 = BRW_VFCOMPONENT_STORE_SRC;
509 input->vep->ve1.vfcomponent2 = BRW_VFCOMPONENT_STORE_SRC;
510 input->vep->ve1.vfcomponent1 = BRW_VFCOMPONENT_STORE_SRC;
511 input->vep->ve1.vfcomponent0 = BRW_VFCOMPONENT_STORE_SRC;
512
513 switch (input->glarray->Size) {
514 case 0: input->vep->ve1.vfcomponent0 = BRW_VFCOMPONENT_STORE_0;
515 case 1: input->vep->ve1.vfcomponent1 = BRW_VFCOMPONENT_STORE_0;
516 case 2: input->vep->ve1.vfcomponent2 = BRW_VFCOMPONENT_STORE_0;
517 case 3: input->vep->ve1.vfcomponent3 = BRW_VFCOMPONENT_STORE_1_FLT;
518 break;
519 }
520
521 input->vep->ve0.vertex_buffer_index = i;
522 input->vep->ve0.src_offset = 0;
523
524 vbp.vb[i].vb0.bits.pitch = input->glarray->StrideB;
525 vbp.vb[i].vb0.bits.pad = 0;
526 vbp.vb[i].vb0.bits.access_type = BRW_VERTEXBUFFER_ACCESS_VERTEXDATA;
527 vbp.vb[i].vb0.bits.vb_index = i;
528 vbp.vb[i].offset = (GLuint)input->glarray->Ptr;
529 vbp.vb[i].buffer = array_buffer(input->glarray);
530 vbp.vb[i].max_index = max_index;
531 }
532
533
534
535 /* Now emit VB and VEP state packets:
536 */
537 vbp.header.bits.length = (1 + nr_enabled * 4) - 2;
538 vbp.header.bits.opcode = CMD_VERTEX_BUFFER;
539
540 BEGIN_BATCH(vbp.header.bits.length+2, 0);
541 OUT_BATCH( vbp.header.dword );
542
543 for (i = 0; i < nr_enabled; i++) {
544 OUT_BATCH( vbp.vb[i].vb0.dword );
545 OUT_BATCH( bmBufferOffset(&brw->intel, vbp.vb[i].buffer) + vbp.vb[i].offset);
546 OUT_BATCH( vbp.vb[i].max_index );
547 OUT_BATCH( vbp.vb[i].instance_data_step_rate );
548 }
549 ADVANCE_BATCH();
550
551 vep.header.length = (1 + nr_enabled * sizeof(vep.ve[0])/4) - 2;
552 vep.header.opcode = CMD_VERTEX_ELEMENT;
553 brw_cached_batch_struct(brw, &vep, 4 + nr_enabled * sizeof(vep.ve[0]));
554
555 return GL_TRUE;
556 }
557
558
559 static GLuint element_size( GLenum type )
560 {
561 switch(type) {
562 case GL_UNSIGNED_INT: return 4;
563 case GL_UNSIGNED_SHORT: return 2;
564 case GL_UNSIGNED_BYTE: return 1;
565 default: assert(0); return 0;
566 }
567 }
568
569
570
571 void brw_upload_indices( struct brw_context *brw,
572 const struct _mesa_index_buffer *index_buffer )
573 {
574 GLcontext *ctx = &brw->intel.ctx;
575 struct intel_context *intel = &brw->intel;
576 GLuint ib_size = get_size(index_buffer->type) * index_buffer->count;
577 struct gl_buffer_object *bufferobj = index_buffer->obj;
578 GLuint offset = (GLuint)index_buffer->ptr;
579
580 /* Turn into a proper VBO:
581 */
582 if (!bufferobj->Name) {
583
584 /* Get new bufferobj, offset:
585 */
586 get_space(brw, ib_size, &bufferobj, &offset);
587
588 /* Straight upload
589 */
590 ctx->Driver.BufferSubData( ctx,
591 GL_ELEMENT_ARRAY_BUFFER_ARB,
592 offset,
593 ib_size,
594 index_buffer->ptr,
595 bufferobj);
596 }
597
598 /* Emit the indexbuffer packet:
599 */
600 {
601 struct brw_indexbuffer ib;
602 struct buffer *buffer = intel_bufferobj_buffer(intel_buffer_object(bufferobj));
603
604 memset(&ib, 0, sizeof(ib));
605
606 ib.header.bits.opcode = CMD_INDEX_BUFFER;
607 ib.header.bits.length = sizeof(ib)/4 - 2;
608 ib.header.bits.index_format = get_index_type(index_buffer->type);
609 ib.header.bits.cut_index_enable = 0;
610
611
612 BEGIN_BATCH(4, 0);
613 OUT_BATCH( ib.header.dword );
614 OUT_BATCH( bmBufferOffset(intel, buffer) + offset );
615 OUT_BATCH( bmBufferOffset(intel, buffer) + offset + ib_size );
616 OUT_BATCH( 0 );
617 ADVANCE_BATCH();
618 }
619 }