Merge git://proxy01.pd.intel.com:9419/git/mesa/mesa into crestline
[mesa.git] / src / mesa / drivers / dri / i965 / brw_draw.c
1 /**************************************************************************
2 *
3 * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 #include <stdlib.h>
29
30 #include "glheader.h"
31 #include "context.h"
32 #include "state.h"
33 #include "api_validate.h"
34 #include "enums.h"
35
36 #include "brw_draw.h"
37 #include "brw_defines.h"
38 #include "brw_context.h"
39 #include "brw_aub.h"
40 #include "brw_state.h"
41 #include "brw_fallback.h"
42
43 #include "intel_ioctl.h"
44 #include "intel_batchbuffer.h"
45 #include "intel_buffer_objects.h"
46
47 #include "tnl/tnl.h"
48 #include "vbo/vbo_context.h"
49
50
51
52
53 static GLuint hw_prim[GL_POLYGON+1] = {
54 _3DPRIM_POINTLIST,
55 _3DPRIM_LINELIST,
56 _3DPRIM_LINELOOP,
57 _3DPRIM_LINESTRIP,
58 _3DPRIM_TRILIST,
59 _3DPRIM_TRISTRIP,
60 _3DPRIM_TRIFAN,
61 _3DPRIM_QUADLIST,
62 _3DPRIM_QUADSTRIP,
63 _3DPRIM_POLYGON
64 };
65
66
67 static const GLenum reduced_prim[GL_POLYGON+1] = {
68 GL_POINTS,
69 GL_LINES,
70 GL_LINES,
71 GL_LINES,
72 GL_TRIANGLES,
73 GL_TRIANGLES,
74 GL_TRIANGLES,
75 GL_TRIANGLES,
76 GL_TRIANGLES,
77 GL_TRIANGLES
78 };
79
80
81 /* When the primitive changes, set a state bit and re-validate. Not
82 * the nicest and would rather deal with this by having all the
83 * programs be immune to the active primitive (ie. cope with all
84 * possibilities). That may not be realistic however.
85 */
86 static GLuint brw_set_prim(struct brw_context *brw, GLenum prim)
87 {
88 if (INTEL_DEBUG & DEBUG_PRIMS)
89 _mesa_printf("PRIM: %s\n", _mesa_lookup_enum_by_nr(prim));
90
91 /* Slight optimization to avoid the GS program when not needed:
92 */
93 if (prim == GL_QUAD_STRIP &&
94 brw->attribs.Light->ShadeModel != GL_FLAT &&
95 brw->attribs.Polygon->FrontMode == GL_FILL &&
96 brw->attribs.Polygon->BackMode == GL_FILL)
97 prim = GL_TRIANGLE_STRIP;
98
99 if (prim != brw->primitive) {
100 brw->primitive = prim;
101 brw->state.dirty.brw |= BRW_NEW_PRIMITIVE;
102
103 if (reduced_prim[prim] != brw->intel.reduced_primitive) {
104 brw->intel.reduced_primitive = reduced_prim[prim];
105 brw->state.dirty.brw |= BRW_NEW_REDUCED_PRIMITIVE;
106 }
107
108 brw_validate_state(brw);
109 }
110
111 return hw_prim[prim];
112 }
113
114
115 static GLuint trim(GLenum prim, GLuint length)
116 {
117 if (prim == GL_QUAD_STRIP)
118 return length > 3 ? (length - length % 2) : 0;
119 else if (prim == GL_QUADS)
120 return length - length % 4;
121 else
122 return length;
123 }
124
125
126 static void brw_emit_cliprect( struct brw_context *brw,
127 const drm_clip_rect_t *rect )
128 {
129 struct brw_drawrect bdr;
130
131 bdr.header.opcode = CMD_DRAW_RECT;
132 bdr.header.length = sizeof(bdr)/4 - 2;
133 bdr.xmin = rect->x1;
134 bdr.xmax = rect->x2 - 1;
135 bdr.ymin = rect->y1;
136 bdr.ymax = rect->y2 - 1;
137 bdr.xorg = brw->intel.drawX;
138 bdr.yorg = brw->intel.drawY;
139
140 intel_batchbuffer_data( brw->intel.batch, &bdr, sizeof(bdr),
141 INTEL_BATCH_NO_CLIPRECTS);
142 }
143
144
145 static void brw_emit_prim( struct brw_context *brw,
146 const struct _mesa_prim *prim )
147
148 {
149 struct brw_3d_primitive prim_packet;
150
151 if (INTEL_DEBUG & DEBUG_PRIMS)
152 _mesa_printf("PRIM: %s %d %d\n", _mesa_lookup_enum_by_nr(prim->mode),
153 prim->start, prim->count);
154
155 prim_packet.header.opcode = CMD_3D_PRIM;
156 prim_packet.header.length = sizeof(prim_packet)/4 - 2;
157 prim_packet.header.pad = 0;
158 prim_packet.header.topology = brw_set_prim(brw, prim->mode);
159 prim_packet.header.indexed = prim->indexed;
160
161 prim_packet.verts_per_instance = trim(prim->mode, prim->count);
162 prim_packet.start_vert_location = prim->start;
163 prim_packet.instance_count = 1;
164 prim_packet.start_instance_location = 0;
165 prim_packet.base_vert_location = 0;
166
167 if (prim_packet.verts_per_instance) {
168 intel_batchbuffer_data( brw->intel.batch, &prim_packet, sizeof(prim_packet),
169 INTEL_BATCH_NO_CLIPRECTS);
170 }
171 }
172
173 static void brw_merge_inputs( struct brw_context *brw,
174 const struct gl_client_array *arrays[])
175 {
176 struct brw_vertex_element *inputs = brw->vb.inputs;
177 struct brw_vertex_info old = brw->vb.info;
178 GLuint i;
179
180 memset(inputs, 0, sizeof(*inputs));
181 memset(&brw->vb.info, 0, sizeof(brw->vb.info));
182
183 for (i = 0; i < VERT_ATTRIB_MAX; i++) {
184 brw->vb.inputs[i].glarray = arrays[i];
185
186 /* XXX: metaops passes null arrays */
187 if (arrays[i]) {
188 if (arrays[i]->StrideB != 0)
189 brw->vb.info.varying |= 1 << i;
190
191 brw->vb.info.sizes[i/16] |= (inputs[i].glarray->Size - 1) << ((i%16) * 2);
192 }
193 }
194
195 /* Raise statechanges if input sizes and varying have changed:
196 */
197 if (memcmp(brw->vb.info.sizes, old.sizes, sizeof(old.sizes)) != 0)
198 brw->state.dirty.brw |= BRW_NEW_INPUT_DIMENSIONS;
199
200 if (brw->vb.info.varying != old.varying)
201 brw->state.dirty.brw |= BRW_NEW_INPUT_VARYING;
202 }
203
204 /* XXX: could split the primitive list to fallback only on the
205 * non-conformant primitives.
206 */
207 static GLboolean check_fallbacks( struct brw_context *brw,
208 const struct _mesa_prim *prim,
209 GLuint nr_prims )
210 {
211 GLuint i;
212
213 if (!brw->intel.strict_conformance)
214 return GL_FALSE;
215
216 if (brw->attribs.Polygon->SmoothFlag) {
217 for (i = 0; i < nr_prims; i++)
218 if (reduced_prim[prim[i].mode] == GL_TRIANGLES)
219 return GL_TRUE;
220 }
221
222 /* BRW hardware will do AA lines, but they are non-conformant it
223 * seems. TBD whether we keep this fallback:
224 */
225 if (brw->attribs.Line->SmoothFlag) {
226 for (i = 0; i < nr_prims; i++)
227 if (reduced_prim[prim[i].mode] == GL_LINES)
228 return GL_TRUE;
229 }
230
231 /* Stipple -- these fallbacks could be resolved with a little
232 * bit of work?
233 */
234 if (brw->attribs.Line->StippleFlag) {
235 for (i = 0; i < nr_prims; i++) {
236 /* GS doesn't get enough information to know when to reset
237 * the stipple counter?!?
238 */
239 if (prim[i].mode == GL_LINE_LOOP)
240 return GL_TRUE;
241
242 if (prim[i].mode == GL_POLYGON &&
243 (brw->attribs.Polygon->FrontMode == GL_LINE ||
244 brw->attribs.Polygon->BackMode == GL_LINE))
245 return GL_TRUE;
246 }
247 }
248
249
250 if (brw->attribs.Point->SmoothFlag) {
251 for (i = 0; i < nr_prims; i++)
252 if (prim[i].mode == GL_POINTS)
253 return GL_TRUE;
254 }
255
256 return GL_FALSE;
257 }
258
259 /* May fail if out of video memory for texture or vbo upload, or on
260 * fallback conditions.
261 */
262 static GLboolean brw_try_draw_prims( GLcontext *ctx,
263 const struct gl_client_array *arrays[],
264 const struct _mesa_prim *prim,
265 GLuint nr_prims,
266 const struct _mesa_index_buffer *ib,
267 GLuint min_index,
268 GLuint max_index )
269 {
270 struct intel_context *intel = intel_context(ctx);
271 struct brw_context *brw = brw_context(ctx);
272 GLboolean retval = GL_FALSE;
273 GLuint i, j;
274
275 if (ctx->NewState)
276 _mesa_update_state( ctx );
277
278 /* Bind all inputs, derive varying and size information:
279 */
280 brw_merge_inputs( brw, arrays );
281
282 /* Have to validate state quite late. Will rebuild tnl_program,
283 * which depends on varying information.
284 *
285 * Note this is where brw->vs->prog_data.inputs_read is calculated,
286 * so can't access it earlier.
287 */
288
289 LOCK_HARDWARE(intel);
290
291 if (brw->intel.numClipRects == 0) {
292 assert(intel->batch->ptr == intel->batch->map + intel->batch->offset);
293 UNLOCK_HARDWARE(intel);
294 return GL_TRUE;
295 }
296
297 {
298 /* Set the first primitive early, ahead of validate_state:
299 */
300 brw_set_prim(brw, prim[0].mode);
301
302 /* XXX: Need to separate validate and upload of state.
303 */
304 brw_validate_state( brw );
305
306 /* Various fallback checks:
307 */
308 if (brw->intel.Fallback)
309 goto out;
310
311 if (check_fallbacks( brw, prim, nr_prims ))
312 goto out;
313
314 /* Upload index, vertex data:
315 */
316 if (ib)
317 brw_upload_indices( brw, ib );
318
319 if (!brw_upload_vertices( brw, min_index, max_index)) {
320 goto out;
321 }
322
323 /* For single cliprect, state is already emitted:
324 */
325 if (brw->intel.numClipRects == 1) {
326 for (i = 0; i < nr_prims; i++) {
327 brw_emit_prim(brw, &prim[i]);
328 }
329 }
330 else {
331 /* Otherwise, explicitly do the cliprects at this point:
332 */
333 for (j = 0; j < brw->intel.numClipRects; j++) {
334 brw_emit_cliprect(brw, &brw->intel.pClipRects[j]);
335
336 /* Emit prims to batchbuffer:
337 */
338 for (i = 0; i < nr_prims; i++) {
339 brw_emit_prim(brw, &prim[i]);
340 }
341 }
342 }
343
344 intel->need_flush = GL_TRUE;
345 retval = GL_TRUE;
346 }
347
348 out:
349
350 /* Currently have to do this to synchronize with the map/unmap of
351 * the vertex buffer in brw_exec_api.c. Not sure if there is any
352 * way around this, as not every flush is due to a buffer filling
353 * up.
354 */
355 if (!intel_batchbuffer_flush( brw->intel.batch )) {
356 DBG("%s intel_batchbuffer_flush failed\n", __FUNCTION__);
357 retval = GL_FALSE;
358 }
359
360 if (retval && intel->thrashing) {
361 bmSetFence(intel);
362 }
363
364 /* Free any old data so it doesn't clog up texture memory - we
365 * won't be referencing it again.
366 */
367 while (brw->vb.upload.wrap != brw->vb.upload.buf) {
368 ctx->Driver.BufferData(ctx,
369 GL_ARRAY_BUFFER_ARB,
370 BRW_UPLOAD_INIT_SIZE,
371 NULL,
372 GL_DYNAMIC_DRAW_ARB,
373 brw->vb.upload.vbo[brw->vb.upload.wrap]);
374 brw->vb.upload.wrap++;
375 brw->vb.upload.wrap %= BRW_NR_UPLOAD_BUFS;
376 }
377
378 UNLOCK_HARDWARE(intel);
379
380 if (!retval)
381 DBG("%s failed\n", __FUNCTION__);
382
383 return retval;
384 }
385
386 static GLboolean brw_need_rebase( GLcontext *ctx,
387 const struct gl_client_array *arrays[],
388 const struct _mesa_index_buffer *ib,
389 GLuint min_index )
390 {
391 if (min_index == 0)
392 return GL_FALSE;
393
394 if (ib) {
395 if (!vbo_all_varyings_in_vbos(arrays))
396 return GL_TRUE;
397 else
398 return GL_FALSE;
399 }
400 else {
401 /* Hmm. This isn't quite what I wanted. BRW can actually
402 * handle the mixed case well enough that we shouldn't need to
403 * rebase. However, it's probably not very common, nor hugely
404 * expensive to do it this way:
405 */
406 if (!vbo_all_varyings_in_vbos(arrays))
407 return GL_TRUE;
408 else
409 return GL_FALSE;
410 }
411 }
412
413
414 void brw_draw_prims( GLcontext *ctx,
415 const struct gl_client_array *arrays[],
416 const struct _mesa_prim *prim,
417 GLuint nr_prims,
418 const struct _mesa_index_buffer *ib,
419 GLuint min_index,
420 GLuint max_index )
421 {
422 struct intel_context *intel = intel_context(ctx);
423 GLboolean retval;
424
425 /* Decide if we want to rebase. If so we end up recursing once
426 * only into this function.
427 */
428 if (brw_need_rebase( ctx, arrays, ib, min_index )) {
429 vbo_rebase_prims( ctx, arrays,
430 prim, nr_prims,
431 ib, min_index, max_index,
432 brw_draw_prims );
433
434 return;
435 }
436
437
438 /* Make a first attempt at drawing:
439 */
440 retval = brw_try_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index);
441
442
443 /* This looks like out-of-memory but potentially we have
444 * situation where there is enough memory but it has become
445 * fragmented. Clear out all heaps and start from scratch by
446 * faking a contended lock event: (done elsewhere)
447 */
448 if (!retval && !intel->Fallback && bmError(intel)) {
449 DBG("retrying\n");
450 /* Then try a second time only to upload textures and draw the
451 * primitives:
452 */
453 retval = brw_try_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index);
454 }
455
456 /* Otherwise, we really are out of memory. Pass the drawing
457 * command to the software tnl module and which will in turn call
458 * swrast to do the drawing.
459 */
460 if (!retval) {
461 _tnl_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index);
462 }
463
464 if (intel->aub_file && (INTEL_DEBUG & DEBUG_SYNC)) {
465 intelFinish( &intel->ctx );
466 intel->aub_wrap = 1;
467 }
468 }
469
470
471 static void brw_invalidate_vbo_cb( struct intel_context *intel, void *ptr )
472 {
473 /* nothing to do, we don't rely on the contents being preserved */
474 }
475
476
477 void brw_draw_init( struct brw_context *brw )
478 {
479 GLcontext *ctx = &brw->intel.ctx;
480 struct vbo_context *vbo = vbo_context(ctx);
481 GLuint i;
482
483 /* Register our drawing function:
484 */
485 vbo->draw_prims = brw_draw_prims;
486
487 brw->vb.upload.size = BRW_UPLOAD_INIT_SIZE;
488
489 for (i = 0; i < BRW_NR_UPLOAD_BUFS; i++) {
490 brw->vb.upload.vbo[i] = ctx->Driver.NewBufferObject(ctx, 1, GL_ARRAY_BUFFER_ARB);
491
492 /* NOTE: These are set to no-backing-store.
493 */
494 bmBufferSetInvalidateCB(&brw->intel,
495 intel_bufferobj_buffer(intel_buffer_object(brw->vb.upload.vbo[i])),
496 brw_invalidate_vbo_cb,
497 &brw->intel,
498 GL_TRUE);
499 }
500
501 ctx->Driver.BufferData( ctx,
502 GL_ARRAY_BUFFER_ARB,
503 BRW_UPLOAD_INIT_SIZE,
504 NULL,
505 GL_DYNAMIC_DRAW_ARB,
506 brw->vb.upload.vbo[0] );
507 }
508
509 void brw_draw_destroy( struct brw_context *brw )
510 {
511 GLcontext *ctx = &brw->intel.ctx;
512 GLuint i;
513
514 for (i = 0; i < BRW_NR_UPLOAD_BUFS; i++)
515 ctx->Driver.DeleteBuffer(ctx, brw->vb.upload.vbo[i]);
516 }