nir/i965: assert first is always less than 64
[mesa.git] / src / mesa / drivers / dri / i965 / brw_draw_upload.c
1 /*
2 * Copyright 2003 VMware, Inc.
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sublicense, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the
14 * next paragraph) shall be included in all copies or substantial portions
15 * of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
20 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
21 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
22 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
23 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 */
25
26 #include "main/bufferobj.h"
27 #include "main/context.h"
28 #include "main/enums.h"
29 #include "main/macros.h"
30 #include "main/glformats.h"
31
32 #include "brw_draw.h"
33 #include "brw_defines.h"
34 #include "brw_context.h"
35 #include "brw_state.h"
36
37 #include "intel_batchbuffer.h"
38 #include "intel_buffer_objects.h"
39
40 static const GLuint double_types_float[5] = {
41 0,
42 BRW_SURFACEFORMAT_R64_FLOAT,
43 BRW_SURFACEFORMAT_R64G64_FLOAT,
44 BRW_SURFACEFORMAT_R64G64B64_FLOAT,
45 BRW_SURFACEFORMAT_R64G64B64A64_FLOAT
46 };
47
48 static const GLuint double_types_passthru[5] = {
49 0,
50 BRW_SURFACEFORMAT_R64_PASSTHRU,
51 BRW_SURFACEFORMAT_R64G64_PASSTHRU,
52 BRW_SURFACEFORMAT_R64G64B64_PASSTHRU,
53 BRW_SURFACEFORMAT_R64G64B64A64_PASSTHRU
54 };
55
56 static const GLuint float_types[5] = {
57 0,
58 BRW_SURFACEFORMAT_R32_FLOAT,
59 BRW_SURFACEFORMAT_R32G32_FLOAT,
60 BRW_SURFACEFORMAT_R32G32B32_FLOAT,
61 BRW_SURFACEFORMAT_R32G32B32A32_FLOAT
62 };
63
64 static const GLuint half_float_types[5] = {
65 0,
66 BRW_SURFACEFORMAT_R16_FLOAT,
67 BRW_SURFACEFORMAT_R16G16_FLOAT,
68 BRW_SURFACEFORMAT_R16G16B16_FLOAT,
69 BRW_SURFACEFORMAT_R16G16B16A16_FLOAT
70 };
71
72 static const GLuint fixed_point_types[5] = {
73 0,
74 BRW_SURFACEFORMAT_R32_SFIXED,
75 BRW_SURFACEFORMAT_R32G32_SFIXED,
76 BRW_SURFACEFORMAT_R32G32B32_SFIXED,
77 BRW_SURFACEFORMAT_R32G32B32A32_SFIXED,
78 };
79
80 static const GLuint uint_types_direct[5] = {
81 0,
82 BRW_SURFACEFORMAT_R32_UINT,
83 BRW_SURFACEFORMAT_R32G32_UINT,
84 BRW_SURFACEFORMAT_R32G32B32_UINT,
85 BRW_SURFACEFORMAT_R32G32B32A32_UINT
86 };
87
88 static const GLuint uint_types_norm[5] = {
89 0,
90 BRW_SURFACEFORMAT_R32_UNORM,
91 BRW_SURFACEFORMAT_R32G32_UNORM,
92 BRW_SURFACEFORMAT_R32G32B32_UNORM,
93 BRW_SURFACEFORMAT_R32G32B32A32_UNORM
94 };
95
96 static const GLuint uint_types_scale[5] = {
97 0,
98 BRW_SURFACEFORMAT_R32_USCALED,
99 BRW_SURFACEFORMAT_R32G32_USCALED,
100 BRW_SURFACEFORMAT_R32G32B32_USCALED,
101 BRW_SURFACEFORMAT_R32G32B32A32_USCALED
102 };
103
104 static const GLuint int_types_direct[5] = {
105 0,
106 BRW_SURFACEFORMAT_R32_SINT,
107 BRW_SURFACEFORMAT_R32G32_SINT,
108 BRW_SURFACEFORMAT_R32G32B32_SINT,
109 BRW_SURFACEFORMAT_R32G32B32A32_SINT
110 };
111
112 static const GLuint int_types_norm[5] = {
113 0,
114 BRW_SURFACEFORMAT_R32_SNORM,
115 BRW_SURFACEFORMAT_R32G32_SNORM,
116 BRW_SURFACEFORMAT_R32G32B32_SNORM,
117 BRW_SURFACEFORMAT_R32G32B32A32_SNORM
118 };
119
120 static const GLuint int_types_scale[5] = {
121 0,
122 BRW_SURFACEFORMAT_R32_SSCALED,
123 BRW_SURFACEFORMAT_R32G32_SSCALED,
124 BRW_SURFACEFORMAT_R32G32B32_SSCALED,
125 BRW_SURFACEFORMAT_R32G32B32A32_SSCALED
126 };
127
128 static const GLuint ushort_types_direct[5] = {
129 0,
130 BRW_SURFACEFORMAT_R16_UINT,
131 BRW_SURFACEFORMAT_R16G16_UINT,
132 BRW_SURFACEFORMAT_R16G16B16_UINT,
133 BRW_SURFACEFORMAT_R16G16B16A16_UINT
134 };
135
136 static const GLuint ushort_types_norm[5] = {
137 0,
138 BRW_SURFACEFORMAT_R16_UNORM,
139 BRW_SURFACEFORMAT_R16G16_UNORM,
140 BRW_SURFACEFORMAT_R16G16B16_UNORM,
141 BRW_SURFACEFORMAT_R16G16B16A16_UNORM
142 };
143
144 static const GLuint ushort_types_scale[5] = {
145 0,
146 BRW_SURFACEFORMAT_R16_USCALED,
147 BRW_SURFACEFORMAT_R16G16_USCALED,
148 BRW_SURFACEFORMAT_R16G16B16_USCALED,
149 BRW_SURFACEFORMAT_R16G16B16A16_USCALED
150 };
151
152 static const GLuint short_types_direct[5] = {
153 0,
154 BRW_SURFACEFORMAT_R16_SINT,
155 BRW_SURFACEFORMAT_R16G16_SINT,
156 BRW_SURFACEFORMAT_R16G16B16_SINT,
157 BRW_SURFACEFORMAT_R16G16B16A16_SINT
158 };
159
160 static const GLuint short_types_norm[5] = {
161 0,
162 BRW_SURFACEFORMAT_R16_SNORM,
163 BRW_SURFACEFORMAT_R16G16_SNORM,
164 BRW_SURFACEFORMAT_R16G16B16_SNORM,
165 BRW_SURFACEFORMAT_R16G16B16A16_SNORM
166 };
167
168 static const GLuint short_types_scale[5] = {
169 0,
170 BRW_SURFACEFORMAT_R16_SSCALED,
171 BRW_SURFACEFORMAT_R16G16_SSCALED,
172 BRW_SURFACEFORMAT_R16G16B16_SSCALED,
173 BRW_SURFACEFORMAT_R16G16B16A16_SSCALED
174 };
175
176 static const GLuint ubyte_types_direct[5] = {
177 0,
178 BRW_SURFACEFORMAT_R8_UINT,
179 BRW_SURFACEFORMAT_R8G8_UINT,
180 BRW_SURFACEFORMAT_R8G8B8_UINT,
181 BRW_SURFACEFORMAT_R8G8B8A8_UINT
182 };
183
184 static const GLuint ubyte_types_norm[5] = {
185 0,
186 BRW_SURFACEFORMAT_R8_UNORM,
187 BRW_SURFACEFORMAT_R8G8_UNORM,
188 BRW_SURFACEFORMAT_R8G8B8_UNORM,
189 BRW_SURFACEFORMAT_R8G8B8A8_UNORM
190 };
191
192 static const GLuint ubyte_types_scale[5] = {
193 0,
194 BRW_SURFACEFORMAT_R8_USCALED,
195 BRW_SURFACEFORMAT_R8G8_USCALED,
196 BRW_SURFACEFORMAT_R8G8B8_USCALED,
197 BRW_SURFACEFORMAT_R8G8B8A8_USCALED
198 };
199
200 static const GLuint byte_types_direct[5] = {
201 0,
202 BRW_SURFACEFORMAT_R8_SINT,
203 BRW_SURFACEFORMAT_R8G8_SINT,
204 BRW_SURFACEFORMAT_R8G8B8_SINT,
205 BRW_SURFACEFORMAT_R8G8B8A8_SINT
206 };
207
208 static const GLuint byte_types_norm[5] = {
209 0,
210 BRW_SURFACEFORMAT_R8_SNORM,
211 BRW_SURFACEFORMAT_R8G8_SNORM,
212 BRW_SURFACEFORMAT_R8G8B8_SNORM,
213 BRW_SURFACEFORMAT_R8G8B8A8_SNORM
214 };
215
216 static const GLuint byte_types_scale[5] = {
217 0,
218 BRW_SURFACEFORMAT_R8_SSCALED,
219 BRW_SURFACEFORMAT_R8G8_SSCALED,
220 BRW_SURFACEFORMAT_R8G8B8_SSCALED,
221 BRW_SURFACEFORMAT_R8G8B8A8_SSCALED
222 };
223
224 static GLuint
225 double_types(struct brw_context *brw,
226 int size,
227 GLboolean doubles)
228 {
229 /* From the BDW PRM, Volume 2d, page 588 (VERTEX_ELEMENT_STATE):
230 * "When SourceElementFormat is set to one of the *64*_PASSTHRU formats,
231 * 64-bit components are stored in the URB without any conversion."
232 * Also included on BDW PRM, Volume 7, page 470, table "Source Element
233 * Formats Supported in VF Unit"
234 *
235 * Previous PRMs don't include those references, so for gen7 we can't use
236 * PASSTHRU formats directly. But in any case, we prefer to return passthru
237 * even in that case, because that reflects what we want to achieve, even
238 * if we would need to workaround on gen < 8.
239 */
240 return (doubles
241 ? double_types_passthru[size]
242 : double_types_float[size]);
243 }
244
245 static bool
246 is_passthru_format(uint32_t format)
247 {
248 switch (format) {
249 case BRW_SURFACEFORMAT_R64_PASSTHRU:
250 case BRW_SURFACEFORMAT_R64G64_PASSTHRU:
251 case BRW_SURFACEFORMAT_R64G64B64_PASSTHRU:
252 case BRW_SURFACEFORMAT_R64G64B64A64_PASSTHRU:
253 return true;
254 default:
255 return false;
256 }
257 }
258
259 static int
260 uploads_needed(uint32_t format)
261 {
262 if (!is_passthru_format(format))
263 return 1;
264
265 switch (format) {
266 case BRW_SURFACEFORMAT_R64_PASSTHRU:
267 case BRW_SURFACEFORMAT_R64G64_PASSTHRU:
268 return 1;
269 case BRW_SURFACEFORMAT_R64G64B64_PASSTHRU:
270 case BRW_SURFACEFORMAT_R64G64B64A64_PASSTHRU:
271 return 2;
272 default:
273 unreachable("not reached");
274 }
275 }
276
277 /*
278 * Returns the number of componentes associated with a format that is used on
279 * a 64 to 32 format split. See downsize_format()
280 */
281 static int
282 upload_format_size(uint32_t upload_format)
283 {
284 switch (upload_format) {
285 case BRW_SURFACEFORMAT_R32G32_FLOAT:
286 return 2;
287 case BRW_SURFACEFORMAT_R32G32B32A32_FLOAT:
288 return 4;
289 default:
290 unreachable("not reached");
291 }
292 }
293
294 /*
295 * Returns the format that we are finally going to use when upload a vertex
296 * element. It will only change if we are using *64*PASSTHRU formats, as for
297 * gen < 8 they need to be splitted on two *32*FLOAT formats.
298 *
299 * @upload points in which upload we are. Valid values are [0,1]
300 */
301 static uint32_t
302 downsize_format_if_needed(uint32_t format,
303 int upload)
304 {
305 assert(upload == 0 || upload == 1);
306
307 if (!is_passthru_format(format))
308 return format;
309
310 switch (format) {
311 case BRW_SURFACEFORMAT_R64_PASSTHRU:
312 return BRW_SURFACEFORMAT_R32G32_FLOAT;
313 case BRW_SURFACEFORMAT_R64G64_PASSTHRU:
314 return BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
315 case BRW_SURFACEFORMAT_R64G64B64_PASSTHRU:
316 return !upload ? BRW_SURFACEFORMAT_R32G32B32A32_FLOAT
317 : BRW_SURFACEFORMAT_R32G32_FLOAT;
318 case BRW_SURFACEFORMAT_R64G64B64A64_PASSTHRU:
319 return BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
320 default:
321 unreachable("not reached");
322 }
323 }
324
325 /**
326 * Given vertex array type/size/format/normalized info, return
327 * the appopriate hardware surface type.
328 * Format will be GL_RGBA or possibly GL_BGRA for GLubyte[4] color arrays.
329 */
330 unsigned
331 brw_get_vertex_surface_type(struct brw_context *brw,
332 const struct gl_vertex_array *glarray)
333 {
334 int size = glarray->Size;
335 const bool is_ivybridge_or_older =
336 brw->gen <= 7 && !brw->is_baytrail && !brw->is_haswell;
337
338 if (unlikely(INTEL_DEBUG & DEBUG_VERTS))
339 fprintf(stderr, "type %s size %d normalized %d\n",
340 _mesa_enum_to_string(glarray->Type),
341 glarray->Size, glarray->Normalized);
342
343 if (glarray->Integer) {
344 assert(glarray->Format == GL_RGBA); /* sanity check */
345 switch (glarray->Type) {
346 case GL_INT: return int_types_direct[size];
347 case GL_SHORT:
348 if (is_ivybridge_or_older && size == 3)
349 return short_types_direct[4];
350 else
351 return short_types_direct[size];
352 case GL_BYTE:
353 if (is_ivybridge_or_older && size == 3)
354 return byte_types_direct[4];
355 else
356 return byte_types_direct[size];
357 case GL_UNSIGNED_INT: return uint_types_direct[size];
358 case GL_UNSIGNED_SHORT:
359 if (is_ivybridge_or_older && size == 3)
360 return ushort_types_direct[4];
361 else
362 return ushort_types_direct[size];
363 case GL_UNSIGNED_BYTE:
364 if (is_ivybridge_or_older && size == 3)
365 return ubyte_types_direct[4];
366 else
367 return ubyte_types_direct[size];
368 default: unreachable("not reached");
369 }
370 } else if (glarray->Type == GL_UNSIGNED_INT_10F_11F_11F_REV) {
371 return BRW_SURFACEFORMAT_R11G11B10_FLOAT;
372 } else if (glarray->Normalized) {
373 switch (glarray->Type) {
374 case GL_DOUBLE: return double_types(brw, size, glarray->Doubles);
375 case GL_FLOAT: return float_types[size];
376 case GL_HALF_FLOAT:
377 case GL_HALF_FLOAT_OES:
378 if (brw->gen < 6 && size == 3)
379 return half_float_types[4];
380 else
381 return half_float_types[size];
382 case GL_INT: return int_types_norm[size];
383 case GL_SHORT: return short_types_norm[size];
384 case GL_BYTE: return byte_types_norm[size];
385 case GL_UNSIGNED_INT: return uint_types_norm[size];
386 case GL_UNSIGNED_SHORT: return ushort_types_norm[size];
387 case GL_UNSIGNED_BYTE:
388 if (glarray->Format == GL_BGRA) {
389 /* See GL_EXT_vertex_array_bgra */
390 assert(size == 4);
391 return BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
392 }
393 else {
394 return ubyte_types_norm[size];
395 }
396 case GL_FIXED:
397 if (brw->gen >= 8 || brw->is_haswell)
398 return fixed_point_types[size];
399
400 /* This produces GL_FIXED inputs as values between INT32_MIN and
401 * INT32_MAX, which will be scaled down by 1/65536 by the VS.
402 */
403 return int_types_scale[size];
404 /* See GL_ARB_vertex_type_2_10_10_10_rev.
405 * W/A: Pre-Haswell, the hardware doesn't really support the formats we'd
406 * like to use here, so upload everything as UINT and fix
407 * it in the shader
408 */
409 case GL_INT_2_10_10_10_REV:
410 assert(size == 4);
411 if (brw->gen >= 8 || brw->is_haswell) {
412 return glarray->Format == GL_BGRA
413 ? BRW_SURFACEFORMAT_B10G10R10A2_SNORM
414 : BRW_SURFACEFORMAT_R10G10B10A2_SNORM;
415 }
416 return BRW_SURFACEFORMAT_R10G10B10A2_UINT;
417 case GL_UNSIGNED_INT_2_10_10_10_REV:
418 assert(size == 4);
419 if (brw->gen >= 8 || brw->is_haswell) {
420 return glarray->Format == GL_BGRA
421 ? BRW_SURFACEFORMAT_B10G10R10A2_UNORM
422 : BRW_SURFACEFORMAT_R10G10B10A2_UNORM;
423 }
424 return BRW_SURFACEFORMAT_R10G10B10A2_UINT;
425 default: unreachable("not reached");
426 }
427 }
428 else {
429 /* See GL_ARB_vertex_type_2_10_10_10_rev.
430 * W/A: the hardware doesn't really support the formats we'd
431 * like to use here, so upload everything as UINT and fix
432 * it in the shader
433 */
434 if (glarray->Type == GL_INT_2_10_10_10_REV) {
435 assert(size == 4);
436 if (brw->gen >= 8 || brw->is_haswell) {
437 return glarray->Format == GL_BGRA
438 ? BRW_SURFACEFORMAT_B10G10R10A2_SSCALED
439 : BRW_SURFACEFORMAT_R10G10B10A2_SSCALED;
440 }
441 return BRW_SURFACEFORMAT_R10G10B10A2_UINT;
442 } else if (glarray->Type == GL_UNSIGNED_INT_2_10_10_10_REV) {
443 assert(size == 4);
444 if (brw->gen >= 8 || brw->is_haswell) {
445 return glarray->Format == GL_BGRA
446 ? BRW_SURFACEFORMAT_B10G10R10A2_USCALED
447 : BRW_SURFACEFORMAT_R10G10B10A2_USCALED;
448 }
449 return BRW_SURFACEFORMAT_R10G10B10A2_UINT;
450 }
451 assert(glarray->Format == GL_RGBA); /* sanity check */
452 switch (glarray->Type) {
453 case GL_DOUBLE: return double_types(brw, size, glarray->Doubles);
454 case GL_FLOAT: return float_types[size];
455 case GL_HALF_FLOAT:
456 case GL_HALF_FLOAT_OES:
457 if (brw->gen < 6 && size == 3)
458 return half_float_types[4];
459 else
460 return half_float_types[size];
461 case GL_INT: return int_types_scale[size];
462 case GL_SHORT: return short_types_scale[size];
463 case GL_BYTE: return byte_types_scale[size];
464 case GL_UNSIGNED_INT: return uint_types_scale[size];
465 case GL_UNSIGNED_SHORT: return ushort_types_scale[size];
466 case GL_UNSIGNED_BYTE: return ubyte_types_scale[size];
467 case GL_FIXED:
468 if (brw->gen >= 8 || brw->is_haswell)
469 return fixed_point_types[size];
470
471 /* This produces GL_FIXED inputs as values between INT32_MIN and
472 * INT32_MAX, which will be scaled down by 1/65536 by the VS.
473 */
474 return int_types_scale[size];
475 default: unreachable("not reached");
476 }
477 }
478 }
479
480 static void
481 copy_array_to_vbo_array(struct brw_context *brw,
482 struct brw_vertex_element *element,
483 int min, int max,
484 struct brw_vertex_buffer *buffer,
485 GLuint dst_stride)
486 {
487 const int src_stride = element->glarray->StrideB;
488
489 /* If the source stride is zero, we just want to upload the current
490 * attribute once and set the buffer's stride to 0. There's no need
491 * to replicate it out.
492 */
493 if (src_stride == 0) {
494 intel_upload_data(brw, element->glarray->Ptr,
495 element->glarray->_ElementSize,
496 element->glarray->_ElementSize,
497 &buffer->bo, &buffer->offset);
498
499 buffer->stride = 0;
500 buffer->size = element->glarray->_ElementSize;
501 return;
502 }
503
504 const unsigned char *src = element->glarray->Ptr + min * src_stride;
505 int count = max - min + 1;
506 GLuint size = count * dst_stride;
507 uint8_t *dst = intel_upload_space(brw, size, dst_stride,
508 &buffer->bo, &buffer->offset);
509
510 /* The GL 4.5 spec says:
511 * "If any enabled array’s buffer binding is zero when DrawArrays or
512 * one of the other drawing commands defined in section 10.4 is called,
513 * the result is undefined."
514 *
515 * In this case, let's the dst with undefined values
516 */
517 if (src != NULL) {
518 if (dst_stride == src_stride) {
519 memcpy(dst, src, size);
520 } else {
521 while (count--) {
522 memcpy(dst, src, dst_stride);
523 src += src_stride;
524 dst += dst_stride;
525 }
526 }
527 }
528 buffer->stride = dst_stride;
529 buffer->size = size;
530 }
531
532 void
533 brw_prepare_vertices(struct brw_context *brw)
534 {
535 struct gl_context *ctx = &brw->ctx;
536 /* BRW_NEW_VS_PROG_DATA */
537 const struct brw_vs_prog_data *vs_prog_data =
538 brw_vs_prog_data(brw->vs.base.prog_data);
539 GLbitfield64 vs_inputs = vs_prog_data->inputs_read;
540 const unsigned char *ptr = NULL;
541 GLuint interleaved = 0;
542 unsigned int min_index = brw->vb.min_index + brw->basevertex;
543 unsigned int max_index = brw->vb.max_index + brw->basevertex;
544 unsigned i;
545 int delta, j;
546
547 struct brw_vertex_element *upload[VERT_ATTRIB_MAX];
548 GLuint nr_uploads = 0;
549
550 /* _NEW_POLYGON
551 *
552 * On gen6+, edge flags don't end up in the VUE (either in or out of the
553 * VS). Instead, they're uploaded as the last vertex element, and the data
554 * is passed sideband through the fixed function units. So, we need to
555 * prepare the vertex buffer for it, but it's not present in inputs_read.
556 */
557 if (brw->gen >= 6 && (ctx->Polygon.FrontMode != GL_FILL ||
558 ctx->Polygon.BackMode != GL_FILL)) {
559 vs_inputs |= VERT_BIT_EDGEFLAG;
560 }
561
562 if (0)
563 fprintf(stderr, "%s %d..%d\n", __func__, min_index, max_index);
564
565 /* Accumulate the list of enabled arrays. */
566 brw->vb.nr_enabled = 0;
567 while (vs_inputs) {
568 GLuint first = ffsll(vs_inputs) - 1;
569 assert (first < 64);
570 GLuint index =
571 first - DIV_ROUND_UP(_mesa_bitcount_64(vs_prog_data->double_inputs_read &
572 BITFIELD64_MASK(first)), 2);
573 struct brw_vertex_element *input = &brw->vb.inputs[index];
574 input->is_dual_slot = (vs_prog_data->double_inputs_read & BITFIELD64_BIT(first)) != 0;
575 vs_inputs &= ~BITFIELD64_BIT(first);
576 if (input->is_dual_slot)
577 vs_inputs &= ~BITFIELD64_BIT(first + 1);
578 brw->vb.enabled[brw->vb.nr_enabled++] = input;
579 }
580
581 if (brw->vb.nr_enabled == 0)
582 return;
583
584 if (brw->vb.nr_buffers)
585 return;
586
587 /* The range of data in a given buffer represented as [min, max) */
588 struct intel_buffer_object *enabled_buffer[VERT_ATTRIB_MAX];
589 uint32_t buffer_range_start[VERT_ATTRIB_MAX];
590 uint32_t buffer_range_end[VERT_ATTRIB_MAX];
591
592 for (i = j = 0; i < brw->vb.nr_enabled; i++) {
593 struct brw_vertex_element *input = brw->vb.enabled[i];
594 const struct gl_vertex_array *glarray = input->glarray;
595
596 if (_mesa_is_bufferobj(glarray->BufferObj)) {
597 struct intel_buffer_object *intel_buffer =
598 intel_buffer_object(glarray->BufferObj);
599
600 const uint32_t offset = (uintptr_t)glarray->Ptr;
601
602 /* Start with the worst case */
603 uint32_t start = 0;
604 uint32_t range = intel_buffer->Base.Size;
605 if (glarray->InstanceDivisor) {
606 if (brw->num_instances) {
607 start = offset + glarray->StrideB * brw->baseinstance;
608 range = (glarray->StrideB * ((brw->num_instances - 1) /
609 glarray->InstanceDivisor) +
610 glarray->_ElementSize);
611 }
612 } else {
613 if (brw->vb.index_bounds_valid) {
614 start = offset + min_index * glarray->StrideB;
615 range = (glarray->StrideB * (max_index - min_index) +
616 glarray->_ElementSize);
617 }
618 }
619
620 /* If we have a VB set to be uploaded for this buffer object
621 * already, reuse that VB state so that we emit fewer
622 * relocations.
623 */
624 unsigned k;
625 for (k = 0; k < i; k++) {
626 const struct gl_vertex_array *other = brw->vb.enabled[k]->glarray;
627 if (glarray->BufferObj == other->BufferObj &&
628 glarray->StrideB == other->StrideB &&
629 glarray->InstanceDivisor == other->InstanceDivisor &&
630 (uintptr_t)(glarray->Ptr - other->Ptr) < glarray->StrideB)
631 {
632 input->buffer = brw->vb.enabled[k]->buffer;
633 input->offset = glarray->Ptr - other->Ptr;
634
635 buffer_range_start[input->buffer] =
636 MIN2(buffer_range_start[input->buffer], start);
637 buffer_range_end[input->buffer] =
638 MAX2(buffer_range_end[input->buffer], start + range);
639 break;
640 }
641 }
642 if (k == i) {
643 struct brw_vertex_buffer *buffer = &brw->vb.buffers[j];
644
645 /* Named buffer object: Just reference its contents directly. */
646 buffer->offset = offset;
647 buffer->stride = glarray->StrideB;
648 buffer->step_rate = glarray->InstanceDivisor;
649 buffer->size = glarray->BufferObj->Size - offset;
650
651 enabled_buffer[j] = intel_buffer;
652 buffer_range_start[j] = start;
653 buffer_range_end[j] = start + range;
654
655 input->buffer = j++;
656 input->offset = 0;
657 }
658 } else {
659 /* Queue the buffer object up to be uploaded in the next pass,
660 * when we've decided if we're doing interleaved or not.
661 */
662 if (nr_uploads == 0) {
663 interleaved = glarray->StrideB;
664 ptr = glarray->Ptr;
665 }
666 else if (interleaved != glarray->StrideB ||
667 glarray->Ptr < ptr ||
668 (uintptr_t)(glarray->Ptr - ptr) + glarray->_ElementSize > interleaved)
669 {
670 /* If our stride is different from the first attribute's stride,
671 * or if the first attribute's stride didn't cover our element,
672 * disable the interleaved upload optimization. The second case
673 * can most commonly occur in cases where there is a single vertex
674 * and, for example, the data is stored on the application's
675 * stack.
676 *
677 * NOTE: This will also disable the optimization in cases where
678 * the data is in a different order than the array indices.
679 * Something like:
680 *
681 * float data[...];
682 * glVertexAttribPointer(0, 4, GL_FLOAT, 32, &data[4]);
683 * glVertexAttribPointer(1, 4, GL_FLOAT, 32, &data[0]);
684 */
685 interleaved = 0;
686 }
687
688 upload[nr_uploads++] = input;
689 }
690 }
691
692 /* Now that we've set up all of the buffers, we walk through and reference
693 * each of them. We do this late so that we get the right size in each
694 * buffer and don't reference too little data.
695 */
696 for (i = 0; i < j; i++) {
697 struct brw_vertex_buffer *buffer = &brw->vb.buffers[i];
698 if (buffer->bo)
699 continue;
700
701 const uint32_t start = buffer_range_start[i];
702 const uint32_t range = buffer_range_end[i] - buffer_range_start[i];
703
704 buffer->bo = intel_bufferobj_buffer(brw, enabled_buffer[i], start, range);
705 drm_intel_bo_reference(buffer->bo);
706 }
707
708 /* If we need to upload all the arrays, then we can trim those arrays to
709 * only the used elements [min_index, max_index] so long as we adjust all
710 * the values used in the 3DPRIMITIVE i.e. by setting the vertex bias.
711 */
712 brw->vb.start_vertex_bias = 0;
713 delta = min_index;
714 if (nr_uploads == brw->vb.nr_enabled) {
715 brw->vb.start_vertex_bias = -delta;
716 delta = 0;
717 }
718
719 /* Handle any arrays to be uploaded. */
720 if (nr_uploads > 1) {
721 if (interleaved) {
722 struct brw_vertex_buffer *buffer = &brw->vb.buffers[j];
723 /* All uploads are interleaved, so upload the arrays together as
724 * interleaved. First, upload the contents and set up upload[0].
725 */
726 copy_array_to_vbo_array(brw, upload[0], min_index, max_index,
727 buffer, interleaved);
728 buffer->offset -= delta * interleaved;
729 buffer->size += delta * interleaved;
730
731 for (i = 0; i < nr_uploads; i++) {
732 /* Then, just point upload[i] at upload[0]'s buffer. */
733 upload[i]->offset =
734 ((const unsigned char *)upload[i]->glarray->Ptr - ptr);
735 upload[i]->buffer = j;
736 }
737 j++;
738
739 nr_uploads = 0;
740 }
741 }
742 /* Upload non-interleaved arrays */
743 for (i = 0; i < nr_uploads; i++) {
744 struct brw_vertex_buffer *buffer = &brw->vb.buffers[j];
745 if (upload[i]->glarray->InstanceDivisor == 0) {
746 copy_array_to_vbo_array(brw, upload[i], min_index, max_index,
747 buffer, upload[i]->glarray->_ElementSize);
748 } else {
749 /* This is an instanced attribute, since its InstanceDivisor
750 * is not zero. Therefore, its data will be stepped after the
751 * instanced draw has been run InstanceDivisor times.
752 */
753 uint32_t instanced_attr_max_index =
754 (brw->num_instances - 1) / upload[i]->glarray->InstanceDivisor;
755 copy_array_to_vbo_array(brw, upload[i], 0, instanced_attr_max_index,
756 buffer, upload[i]->glarray->_ElementSize);
757 }
758 buffer->offset -= delta * buffer->stride;
759 buffer->size += delta * buffer->stride;
760 buffer->step_rate = upload[i]->glarray->InstanceDivisor;
761 upload[i]->buffer = j++;
762 upload[i]->offset = 0;
763 }
764
765 brw->vb.nr_buffers = j;
766 }
767
768 void
769 brw_prepare_shader_draw_parameters(struct brw_context *brw)
770 {
771 const struct brw_vs_prog_data *vs_prog_data =
772 brw_vs_prog_data(brw->vs.base.prog_data);
773
774 /* For non-indirect draws, upload gl_BaseVertex. */
775 if ((vs_prog_data->uses_basevertex || vs_prog_data->uses_baseinstance) &&
776 brw->draw.draw_params_bo == NULL) {
777 intel_upload_data(brw, &brw->draw.params, sizeof(brw->draw.params), 4,
778 &brw->draw.draw_params_bo,
779 &brw->draw.draw_params_offset);
780 }
781
782 if (vs_prog_data->uses_drawid) {
783 intel_upload_data(brw, &brw->draw.gl_drawid, sizeof(brw->draw.gl_drawid), 4,
784 &brw->draw.draw_id_bo,
785 &brw->draw.draw_id_offset);
786 }
787 }
788
789 /**
790 * Emit a VERTEX_BUFFER_STATE entry (part of 3DSTATE_VERTEX_BUFFERS).
791 */
792 uint32_t *
793 brw_emit_vertex_buffer_state(struct brw_context *brw,
794 unsigned buffer_nr,
795 drm_intel_bo *bo,
796 unsigned start_offset,
797 unsigned end_offset,
798 unsigned stride,
799 unsigned step_rate,
800 uint32_t *__map)
801 {
802 struct gl_context *ctx = &brw->ctx;
803 uint32_t dw0;
804
805 if (brw->gen >= 8) {
806 dw0 = buffer_nr << GEN6_VB0_INDEX_SHIFT;
807 } else if (brw->gen >= 6) {
808 dw0 = (buffer_nr << GEN6_VB0_INDEX_SHIFT) |
809 (step_rate ? GEN6_VB0_ACCESS_INSTANCEDATA
810 : GEN6_VB0_ACCESS_VERTEXDATA);
811 } else {
812 dw0 = (buffer_nr << BRW_VB0_INDEX_SHIFT) |
813 (step_rate ? BRW_VB0_ACCESS_INSTANCEDATA
814 : BRW_VB0_ACCESS_VERTEXDATA);
815 }
816
817 if (brw->gen >= 7)
818 dw0 |= GEN7_VB0_ADDRESS_MODIFYENABLE;
819
820 switch (brw->gen) {
821 case 7:
822 dw0 |= GEN7_MOCS_L3 << 16;
823 break;
824 case 8:
825 dw0 |= BDW_MOCS_WB << 16;
826 break;
827 case 9:
828 dw0 |= SKL_MOCS_WB << 16;
829 break;
830 }
831
832 WARN_ONCE(stride >= (brw->gen >= 5 ? 2048 : 2047),
833 "VBO stride %d too large, bad rendering may occur\n",
834 stride);
835 OUT_BATCH(dw0 | (stride << BRW_VB0_PITCH_SHIFT));
836 if (brw->gen >= 8) {
837 OUT_RELOC64(bo, I915_GEM_DOMAIN_VERTEX, 0, start_offset);
838 /* From the BSpec: 3D Pipeline Stages - 3D Pipeline Geometry -
839 * Vertex Fetch (VF) Stage - State
840 *
841 * Instead of "VBState.StartingBufferAddress + VBState.MaxIndex x
842 * VBState.BufferPitch", the address of the byte immediately beyond the
843 * last valid byte of the buffer is determined by
844 * "VBState.StartingBufferAddress + VBState.BufferSize".
845 */
846 OUT_BATCH(end_offset - start_offset);
847 } else if (brw->gen >= 5) {
848 OUT_RELOC(bo, I915_GEM_DOMAIN_VERTEX, 0, start_offset);
849 /* From the BSpec: 3D Pipeline Stages - 3D Pipeline Geometry -
850 * Vertex Fetch (VF) Stage - State
851 *
852 * Instead of "VBState.StartingBufferAddress + VBState.MaxIndex x
853 * VBState.BufferPitch", the address of the byte immediately beyond the
854 * last valid byte of the buffer is determined by
855 * "VBState.EndAddress + 1".
856 */
857 OUT_RELOC(bo, I915_GEM_DOMAIN_VERTEX, 0, end_offset - 1);
858 OUT_BATCH(step_rate);
859 } else {
860 OUT_RELOC(bo, I915_GEM_DOMAIN_VERTEX, 0, start_offset);
861 OUT_BATCH(0);
862 OUT_BATCH(step_rate);
863 }
864
865 return __map;
866 }
867
868 static void
869 brw_emit_vertices(struct brw_context *brw)
870 {
871 GLuint i;
872
873 brw_prepare_vertices(brw);
874 brw_prepare_shader_draw_parameters(brw);
875
876 brw_emit_query_begin(brw);
877
878 const struct brw_vs_prog_data *vs_prog_data =
879 brw_vs_prog_data(brw->vs.base.prog_data);
880
881 unsigned nr_elements = brw->vb.nr_enabled;
882 if (vs_prog_data->uses_vertexid || vs_prog_data->uses_instanceid ||
883 vs_prog_data->uses_basevertex || vs_prog_data->uses_baseinstance)
884 ++nr_elements;
885 if (vs_prog_data->uses_drawid)
886 nr_elements++;
887
888 /* If any of the formats of vb.enabled needs more that one upload, we need
889 * to add it to nr_elements */
890 unsigned extra_uploads = 0;
891 for (unsigned i = 0; i < brw->vb.nr_enabled; i++) {
892 struct brw_vertex_element *input = brw->vb.enabled[i];
893 uint32_t format = brw_get_vertex_surface_type(brw, input->glarray);
894
895 if (uploads_needed(format) > 1)
896 extra_uploads++;
897 }
898 nr_elements += extra_uploads;
899
900 /* If the VS doesn't read any inputs (calculating vertex position from
901 * a state variable for some reason, for example), emit a single pad
902 * VERTEX_ELEMENT struct and bail.
903 *
904 * The stale VB state stays in place, but they don't do anything unless
905 * a VE loads from them.
906 */
907 if (nr_elements == 0) {
908 BEGIN_BATCH(3);
909 OUT_BATCH((_3DSTATE_VERTEX_ELEMENTS << 16) | 1);
910 if (brw->gen >= 6) {
911 OUT_BATCH((0 << GEN6_VE0_INDEX_SHIFT) |
912 GEN6_VE0_VALID |
913 (BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT) |
914 (0 << BRW_VE0_SRC_OFFSET_SHIFT));
915 } else {
916 OUT_BATCH((0 << BRW_VE0_INDEX_SHIFT) |
917 BRW_VE0_VALID |
918 (BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT) |
919 (0 << BRW_VE0_SRC_OFFSET_SHIFT));
920 }
921 OUT_BATCH((BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_0_SHIFT) |
922 (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) |
923 (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
924 (BRW_VE1_COMPONENT_STORE_1_FLT << BRW_VE1_COMPONENT_3_SHIFT));
925 ADVANCE_BATCH();
926 return;
927 }
928
929 /* Now emit VB and VEP state packets.
930 */
931
932 const bool uses_draw_params =
933 vs_prog_data->uses_basevertex ||
934 vs_prog_data->uses_baseinstance;
935 const unsigned nr_buffers = brw->vb.nr_buffers +
936 uses_draw_params + vs_prog_data->uses_drawid;
937
938 if (nr_buffers) {
939 if (brw->gen >= 6) {
940 assert(nr_buffers <= 33);
941 } else {
942 assert(nr_buffers <= 17);
943 }
944
945 BEGIN_BATCH(1 + 4 * nr_buffers);
946 OUT_BATCH((_3DSTATE_VERTEX_BUFFERS << 16) | (4 * nr_buffers - 1));
947 for (i = 0; i < brw->vb.nr_buffers; i++) {
948 struct brw_vertex_buffer *buffer = &brw->vb.buffers[i];
949 /* Prior to Haswell and Bay Trail we have to use 4-component formats
950 * to fake 3-component ones. In particular, we do this for
951 * half-float and 8 and 16-bit integer formats. This means that the
952 * vertex element may poke over the end of the buffer by 2 bytes.
953 */
954 unsigned padding =
955 (brw->gen <= 7 && !brw->is_baytrail && !brw->is_haswell) * 2;
956 EMIT_VERTEX_BUFFER_STATE(brw, i, buffer->bo, buffer->offset,
957 buffer->offset + buffer->size + padding,
958 buffer->stride, buffer->step_rate);
959
960 }
961
962 if (uses_draw_params) {
963 EMIT_VERTEX_BUFFER_STATE(brw, brw->vb.nr_buffers,
964 brw->draw.draw_params_bo,
965 brw->draw.draw_params_offset,
966 brw->draw.draw_params_bo->size,
967 0, /* stride */
968 0); /* step rate */
969 }
970
971 if (vs_prog_data->uses_drawid) {
972 EMIT_VERTEX_BUFFER_STATE(brw, brw->vb.nr_buffers + 1,
973 brw->draw.draw_id_bo,
974 brw->draw.draw_id_offset,
975 brw->draw.draw_id_bo->size,
976 0, /* stride */
977 0); /* step rate */
978 }
979
980 ADVANCE_BATCH();
981 }
982
983 /* The hardware allows one more VERTEX_ELEMENTS than VERTEX_BUFFERS, presumably
984 * for VertexID/InstanceID.
985 */
986 if (brw->gen >= 6) {
987 assert(nr_elements <= 34);
988 } else {
989 assert(nr_elements <= 18);
990 }
991
992 struct brw_vertex_element *gen6_edgeflag_input = NULL;
993
994 BEGIN_BATCH(1 + nr_elements * 2);
995 OUT_BATCH((_3DSTATE_VERTEX_ELEMENTS << 16) | (2 * nr_elements - 1));
996 for (i = 0; i < brw->vb.nr_enabled; i++) {
997 struct brw_vertex_element *input = brw->vb.enabled[i];
998 uint32_t format = brw_get_vertex_surface_type(brw, input->glarray);
999 uint32_t comp0 = BRW_VE1_COMPONENT_STORE_SRC;
1000 uint32_t comp1 = BRW_VE1_COMPONENT_STORE_SRC;
1001 uint32_t comp2 = BRW_VE1_COMPONENT_STORE_SRC;
1002 uint32_t comp3 = BRW_VE1_COMPONENT_STORE_SRC;
1003 unsigned num_uploads = 1;
1004 unsigned c;
1005
1006 num_uploads = uploads_needed(format);
1007
1008 if (input == &brw->vb.inputs[VERT_ATTRIB_EDGEFLAG]) {
1009 /* Gen6+ passes edgeflag as sideband along with the vertex, instead
1010 * of in the VUE. We have to upload it sideband as the last vertex
1011 * element according to the B-Spec.
1012 */
1013 if (brw->gen >= 6) {
1014 gen6_edgeflag_input = input;
1015 continue;
1016 }
1017 }
1018
1019 for (c = 0; c < num_uploads; c++) {
1020 uint32_t upload_format = downsize_format_if_needed(format, c);
1021 /* If we need more that one upload, the offset stride would be 128
1022 * bits (16 bytes), as for previous uploads we are using the full
1023 * entry. */
1024 unsigned int offset = input->offset + c * 16;
1025 int size = input->glarray->Size;
1026
1027 if (is_passthru_format(format))
1028 size = upload_format_size(upload_format);
1029
1030 switch (size) {
1031 case 0: comp0 = BRW_VE1_COMPONENT_STORE_0;
1032 case 1: comp1 = BRW_VE1_COMPONENT_STORE_0;
1033 case 2: comp2 = BRW_VE1_COMPONENT_STORE_0;
1034 case 3: comp3 = input->glarray->Integer
1035 ? BRW_VE1_COMPONENT_STORE_1_INT
1036 : BRW_VE1_COMPONENT_STORE_1_FLT;
1037 break;
1038 }
1039
1040 if (brw->gen >= 6) {
1041 OUT_BATCH((input->buffer << GEN6_VE0_INDEX_SHIFT) |
1042 GEN6_VE0_VALID |
1043 (upload_format << BRW_VE0_FORMAT_SHIFT) |
1044 (offset << BRW_VE0_SRC_OFFSET_SHIFT));
1045 } else {
1046 OUT_BATCH((input->buffer << BRW_VE0_INDEX_SHIFT) |
1047 BRW_VE0_VALID |
1048 (upload_format << BRW_VE0_FORMAT_SHIFT) |
1049 (offset << BRW_VE0_SRC_OFFSET_SHIFT));
1050 }
1051
1052 if (brw->gen >= 5)
1053 OUT_BATCH((comp0 << BRW_VE1_COMPONENT_0_SHIFT) |
1054 (comp1 << BRW_VE1_COMPONENT_1_SHIFT) |
1055 (comp2 << BRW_VE1_COMPONENT_2_SHIFT) |
1056 (comp3 << BRW_VE1_COMPONENT_3_SHIFT));
1057 else
1058 OUT_BATCH((comp0 << BRW_VE1_COMPONENT_0_SHIFT) |
1059 (comp1 << BRW_VE1_COMPONENT_1_SHIFT) |
1060 (comp2 << BRW_VE1_COMPONENT_2_SHIFT) |
1061 (comp3 << BRW_VE1_COMPONENT_3_SHIFT) |
1062 ((i * 4) << BRW_VE1_DST_OFFSET_SHIFT));
1063 }
1064 }
1065
1066 if (vs_prog_data->uses_vertexid || vs_prog_data->uses_instanceid ||
1067 vs_prog_data->uses_basevertex || vs_prog_data->uses_baseinstance) {
1068 uint32_t dw0 = 0, dw1 = 0;
1069 uint32_t comp0 = BRW_VE1_COMPONENT_STORE_0;
1070 uint32_t comp1 = BRW_VE1_COMPONENT_STORE_0;
1071 uint32_t comp2 = BRW_VE1_COMPONENT_STORE_0;
1072 uint32_t comp3 = BRW_VE1_COMPONENT_STORE_0;
1073
1074 if (vs_prog_data->uses_basevertex)
1075 comp0 = BRW_VE1_COMPONENT_STORE_SRC;
1076
1077 if (vs_prog_data->uses_baseinstance)
1078 comp1 = BRW_VE1_COMPONENT_STORE_SRC;
1079
1080 if (vs_prog_data->uses_vertexid)
1081 comp2 = BRW_VE1_COMPONENT_STORE_VID;
1082
1083 if (vs_prog_data->uses_instanceid)
1084 comp3 = BRW_VE1_COMPONENT_STORE_IID;
1085
1086 dw1 = (comp0 << BRW_VE1_COMPONENT_0_SHIFT) |
1087 (comp1 << BRW_VE1_COMPONENT_1_SHIFT) |
1088 (comp2 << BRW_VE1_COMPONENT_2_SHIFT) |
1089 (comp3 << BRW_VE1_COMPONENT_3_SHIFT);
1090
1091 if (brw->gen >= 6) {
1092 dw0 |= GEN6_VE0_VALID |
1093 brw->vb.nr_buffers << GEN6_VE0_INDEX_SHIFT |
1094 BRW_SURFACEFORMAT_R32G32_UINT << BRW_VE0_FORMAT_SHIFT;
1095 } else {
1096 dw0 |= BRW_VE0_VALID |
1097 brw->vb.nr_buffers << BRW_VE0_INDEX_SHIFT |
1098 BRW_SURFACEFORMAT_R32G32_UINT << BRW_VE0_FORMAT_SHIFT;
1099 dw1 |= (i * 4) << BRW_VE1_DST_OFFSET_SHIFT;
1100 }
1101
1102 /* Note that for gl_VertexID, gl_InstanceID, and gl_PrimitiveID values,
1103 * the format is ignored and the value is always int.
1104 */
1105
1106 OUT_BATCH(dw0);
1107 OUT_BATCH(dw1);
1108 }
1109
1110 if (vs_prog_data->uses_drawid) {
1111 uint32_t dw0 = 0, dw1 = 0;
1112
1113 dw1 = (BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT) |
1114 (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) |
1115 (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
1116 (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT);
1117
1118 if (brw->gen >= 6) {
1119 dw0 |= GEN6_VE0_VALID |
1120 ((brw->vb.nr_buffers + 1) << GEN6_VE0_INDEX_SHIFT) |
1121 (BRW_SURFACEFORMAT_R32_UINT << BRW_VE0_FORMAT_SHIFT);
1122 } else {
1123 dw0 |= BRW_VE0_VALID |
1124 ((brw->vb.nr_buffers + 1) << BRW_VE0_INDEX_SHIFT) |
1125 (BRW_SURFACEFORMAT_R32_UINT << BRW_VE0_FORMAT_SHIFT);
1126
1127 dw1 |= (i * 4) << BRW_VE1_DST_OFFSET_SHIFT;
1128 }
1129
1130 OUT_BATCH(dw0);
1131 OUT_BATCH(dw1);
1132 }
1133
1134 if (brw->gen >= 6 && gen6_edgeflag_input) {
1135 uint32_t format =
1136 brw_get_vertex_surface_type(brw, gen6_edgeflag_input->glarray);
1137
1138 OUT_BATCH((gen6_edgeflag_input->buffer << GEN6_VE0_INDEX_SHIFT) |
1139 GEN6_VE0_VALID |
1140 GEN6_VE0_EDGE_FLAG_ENABLE |
1141 (format << BRW_VE0_FORMAT_SHIFT) |
1142 (gen6_edgeflag_input->offset << BRW_VE0_SRC_OFFSET_SHIFT));
1143 OUT_BATCH((BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT) |
1144 (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) |
1145 (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
1146 (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT));
1147 }
1148
1149 ADVANCE_BATCH();
1150 }
1151
1152 const struct brw_tracked_state brw_vertices = {
1153 .dirty = {
1154 .mesa = _NEW_POLYGON,
1155 .brw = BRW_NEW_BATCH |
1156 BRW_NEW_BLORP |
1157 BRW_NEW_VERTICES |
1158 BRW_NEW_VS_PROG_DATA,
1159 },
1160 .emit = brw_emit_vertices,
1161 };
1162
1163 static void
1164 brw_upload_indices(struct brw_context *brw)
1165 {
1166 struct gl_context *ctx = &brw->ctx;
1167 const struct _mesa_index_buffer *index_buffer = brw->ib.ib;
1168 GLuint ib_size;
1169 drm_intel_bo *old_bo = brw->ib.bo;
1170 struct gl_buffer_object *bufferobj;
1171 GLuint offset;
1172 GLuint ib_type_size;
1173
1174 if (index_buffer == NULL)
1175 return;
1176
1177 ib_type_size = _mesa_sizeof_type(index_buffer->type);
1178 ib_size = index_buffer->count ? ib_type_size * index_buffer->count :
1179 index_buffer->obj->Size;
1180 bufferobj = index_buffer->obj;
1181
1182 /* Turn into a proper VBO:
1183 */
1184 if (!_mesa_is_bufferobj(bufferobj)) {
1185 /* Get new bufferobj, offset:
1186 */
1187 intel_upload_data(brw, index_buffer->ptr, ib_size, ib_type_size,
1188 &brw->ib.bo, &offset);
1189 brw->ib.size = brw->ib.bo->size;
1190 } else {
1191 offset = (GLuint) (unsigned long) index_buffer->ptr;
1192
1193 /* If the index buffer isn't aligned to its element size, we have to
1194 * rebase it into a temporary.
1195 */
1196 if ((ib_type_size - 1) & offset) {
1197 perf_debug("copying index buffer to a temporary to work around "
1198 "misaligned offset %d\n", offset);
1199
1200 GLubyte *map = ctx->Driver.MapBufferRange(ctx,
1201 offset,
1202 ib_size,
1203 GL_MAP_READ_BIT,
1204 bufferobj,
1205 MAP_INTERNAL);
1206
1207 intel_upload_data(brw, map, ib_size, ib_type_size,
1208 &brw->ib.bo, &offset);
1209 brw->ib.size = brw->ib.bo->size;
1210
1211 ctx->Driver.UnmapBuffer(ctx, bufferobj, MAP_INTERNAL);
1212 } else {
1213 drm_intel_bo *bo =
1214 intel_bufferobj_buffer(brw, intel_buffer_object(bufferobj),
1215 offset, ib_size);
1216 if (bo != brw->ib.bo) {
1217 drm_intel_bo_unreference(brw->ib.bo);
1218 brw->ib.bo = bo;
1219 brw->ib.size = bufferobj->Size;
1220 drm_intel_bo_reference(bo);
1221 }
1222 }
1223 }
1224
1225 /* Use 3DPRIMITIVE's start_vertex_offset to avoid re-uploading
1226 * the index buffer state when we're just moving the start index
1227 * of our drawing.
1228 */
1229 brw->ib.start_vertex_offset = offset / ib_type_size;
1230
1231 if (brw->ib.bo != old_bo)
1232 brw->ctx.NewDriverState |= BRW_NEW_INDEX_BUFFER;
1233
1234 if (index_buffer->type != brw->ib.type) {
1235 brw->ib.type = index_buffer->type;
1236 brw->ctx.NewDriverState |= BRW_NEW_INDEX_BUFFER;
1237 }
1238 }
1239
1240 const struct brw_tracked_state brw_indices = {
1241 .dirty = {
1242 .mesa = 0,
1243 .brw = BRW_NEW_BLORP |
1244 BRW_NEW_INDICES,
1245 },
1246 .emit = brw_upload_indices,
1247 };
1248
1249 static void
1250 brw_emit_index_buffer(struct brw_context *brw)
1251 {
1252 const struct _mesa_index_buffer *index_buffer = brw->ib.ib;
1253 GLuint cut_index_setting;
1254
1255 if (index_buffer == NULL)
1256 return;
1257
1258 if (brw->prim_restart.enable_cut_index && !brw->is_haswell) {
1259 cut_index_setting = BRW_CUT_INDEX_ENABLE;
1260 } else {
1261 cut_index_setting = 0;
1262 }
1263
1264 BEGIN_BATCH(3);
1265 OUT_BATCH(CMD_INDEX_BUFFER << 16 |
1266 cut_index_setting |
1267 brw_get_index_type(index_buffer->type) |
1268 1);
1269 OUT_RELOC(brw->ib.bo,
1270 I915_GEM_DOMAIN_VERTEX, 0,
1271 0);
1272 OUT_RELOC(brw->ib.bo,
1273 I915_GEM_DOMAIN_VERTEX, 0,
1274 brw->ib.size - 1);
1275 ADVANCE_BATCH();
1276 }
1277
1278 const struct brw_tracked_state brw_index_buffer = {
1279 .dirty = {
1280 .mesa = 0,
1281 .brw = BRW_NEW_BATCH |
1282 BRW_NEW_BLORP |
1283 BRW_NEW_INDEX_BUFFER,
1284 },
1285 .emit = brw_emit_index_buffer,
1286 };