Use new rebase helper. Remove other rebase code.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_draw.c
1 /**************************************************************************
2 *
3 * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 #include <stdlib.h>
29
30 #include "glheader.h"
31 #include "context.h"
32 #include "state.h"
33 #include "api_validate.h"
34 #include "enums.h"
35
36 #include "brw_draw.h"
37 #include "brw_defines.h"
38 #include "brw_context.h"
39 #include "brw_aub.h"
40 #include "brw_state.h"
41 #include "brw_fallback.h"
42
43 #include "intel_ioctl.h"
44 #include "intel_batchbuffer.h"
45 #include "intel_buffer_objects.h"
46
47 #include "tnl/tnl.h"
48 #include "vbo/vbo_context.h"
49
50
51
52
53 static GLuint hw_prim[GL_POLYGON+1] = {
54 _3DPRIM_POINTLIST,
55 _3DPRIM_LINELIST,
56 _3DPRIM_LINELOOP,
57 _3DPRIM_LINESTRIP,
58 _3DPRIM_TRILIST,
59 _3DPRIM_TRISTRIP,
60 _3DPRIM_TRIFAN,
61 _3DPRIM_QUADLIST,
62 _3DPRIM_QUADSTRIP,
63 _3DPRIM_POLYGON
64 };
65
66
67 static const GLenum reduced_prim[GL_POLYGON+1] = {
68 GL_POINTS,
69 GL_LINES,
70 GL_LINES,
71 GL_LINES,
72 GL_TRIANGLES,
73 GL_TRIANGLES,
74 GL_TRIANGLES,
75 GL_TRIANGLES,
76 GL_TRIANGLES,
77 GL_TRIANGLES
78 };
79
80
81 /* When the primitive changes, set a state bit and re-validate. Not
82 * the nicest and would rather deal with this by having all the
83 * programs be immune to the active primitive (ie. cope with all
84 * possibilities). That may not be realistic however.
85 */
86 static GLuint brw_set_prim(struct brw_context *brw, GLenum prim)
87 {
88 if (INTEL_DEBUG & DEBUG_PRIMS)
89 _mesa_printf("PRIM: %s\n", _mesa_lookup_enum_by_nr(prim));
90
91 /* Slight optimization to avoid the GS program when not needed:
92 */
93 if (prim == GL_QUAD_STRIP &&
94 brw->attribs.Light->ShadeModel != GL_FLAT &&
95 brw->attribs.Polygon->FrontMode == GL_FILL &&
96 brw->attribs.Polygon->BackMode == GL_FILL)
97 prim = GL_TRIANGLE_STRIP;
98
99 if (prim != brw->primitive) {
100 brw->primitive = prim;
101 brw->state.dirty.brw |= BRW_NEW_PRIMITIVE;
102
103 if (reduced_prim[prim] != brw->intel.reduced_primitive) {
104 brw->intel.reduced_primitive = reduced_prim[prim];
105 brw->state.dirty.brw |= BRW_NEW_REDUCED_PRIMITIVE;
106 }
107
108 brw_validate_state(brw);
109 }
110
111 return hw_prim[prim];
112 }
113
114
115 static GLuint trim(GLenum prim, GLuint length)
116 {
117 if (prim == GL_QUAD_STRIP)
118 return length > 3 ? (length - length % 2) : 0;
119 else if (prim == GL_QUADS)
120 return length - length % 4;
121 else
122 return length;
123 }
124
125
126 static void brw_emit_cliprect( struct brw_context *brw,
127 const drm_clip_rect_t *rect )
128 {
129 struct brw_drawrect bdr;
130
131 bdr.header.opcode = CMD_DRAW_RECT;
132 bdr.header.length = sizeof(bdr)/4 - 2;
133 bdr.xmin = rect->x1;
134 bdr.xmax = rect->x2 - 1;
135 bdr.ymin = rect->y1;
136 bdr.ymax = rect->y2 - 1;
137 bdr.xorg = brw->intel.drawX;
138 bdr.yorg = brw->intel.drawY;
139
140 intel_batchbuffer_data( brw->intel.batch, &bdr, sizeof(bdr),
141 INTEL_BATCH_NO_CLIPRECTS);
142 }
143
144
145 static void brw_emit_prim( struct brw_context *brw,
146 const struct _mesa_prim *prim )
147
148 {
149 struct brw_3d_primitive prim_packet;
150
151 if (INTEL_DEBUG & DEBUG_PRIMS)
152 _mesa_printf("PRIM: %s %d %d\n", _mesa_lookup_enum_by_nr(prim->mode),
153 prim->start, prim->count);
154
155 prim_packet.header.opcode = CMD_3D_PRIM;
156 prim_packet.header.length = sizeof(prim_packet)/4 - 2;
157 prim_packet.header.pad = 0;
158 prim_packet.header.topology = brw_set_prim(brw, prim->mode);
159 prim_packet.header.indexed = prim->indexed;
160
161 prim_packet.verts_per_instance = trim(prim->mode, prim->count);
162 prim_packet.start_vert_location = prim->start;
163 prim_packet.instance_count = 1;
164 prim_packet.start_instance_location = 0;
165 prim_packet.base_vert_location = 0;
166
167 if (prim_packet.verts_per_instance) {
168 intel_batchbuffer_data( brw->intel.batch, &prim_packet, sizeof(prim_packet),
169 INTEL_BATCH_NO_CLIPRECTS);
170 }
171 }
172
173 static void brw_merge_inputs( struct brw_context *brw,
174 const struct gl_client_array *arrays[])
175 {
176 struct brw_vertex_element *inputs = brw->vb.inputs;
177 struct brw_vertex_info old = brw->vb.info;
178 GLuint i;
179
180 memset(inputs, 0, sizeof(*inputs));
181 memset(&brw->vb.info, 0, sizeof(brw->vb.info));
182
183 for (i = 0; i < VERT_ATTRIB_MAX; i++) {
184 brw->vb.inputs[i].glarray = arrays[i];
185
186 if (arrays[i]->StrideB != 0)
187 brw->vb.info.varying |= 1 << i;
188
189 brw->vb.info.sizes[i/16] |= (inputs[i].glarray->Size - 1) << ((i%16) * 2);
190 }
191
192 /* Raise statechanges if input sizes and varying have changed:
193 */
194 if (memcmp(brw->vb.info.sizes, old.sizes, sizeof(old.sizes)) != 0)
195 brw->state.dirty.brw |= BRW_NEW_INPUT_DIMENSIONS;
196
197 if (brw->vb.info.varying != old.varying)
198 brw->state.dirty.brw |= BRW_NEW_INPUT_VARYING;
199 }
200
201 /* XXX: could split the primitive list to fallback only on the
202 * non-conformant primitives.
203 */
204 static GLboolean check_fallbacks( struct brw_context *brw,
205 const struct _mesa_prim *prim,
206 GLuint nr_prims )
207 {
208 GLuint i;
209
210 if (!brw->intel.strict_conformance)
211 return GL_FALSE;
212
213 if (brw->attribs.Polygon->SmoothFlag) {
214 for (i = 0; i < nr_prims; i++)
215 if (reduced_prim[prim[i].mode] == GL_TRIANGLES)
216 return GL_TRUE;
217 }
218
219 /* BRW hardware will do AA lines, but they are non-conformant it
220 * seems. TBD whether we keep this fallback:
221 */
222 if (brw->attribs.Line->SmoothFlag) {
223 for (i = 0; i < nr_prims; i++)
224 if (reduced_prim[prim[i].mode] == GL_LINES)
225 return GL_TRUE;
226 }
227
228 /* Stipple -- these fallbacks could be resolved with a little
229 * bit of work?
230 */
231 if (brw->attribs.Line->StippleFlag) {
232 for (i = 0; i < nr_prims; i++) {
233 /* GS doesn't get enough information to know when to reset
234 * the stipple counter?!?
235 */
236 if (prim[i].mode == GL_LINE_LOOP)
237 return GL_TRUE;
238
239 if (prim[i].mode == GL_POLYGON &&
240 (brw->attribs.Polygon->FrontMode == GL_LINE ||
241 brw->attribs.Polygon->BackMode == GL_LINE))
242 return GL_TRUE;
243 }
244 }
245
246
247 if (brw->attribs.Point->SmoothFlag) {
248 for (i = 0; i < nr_prims; i++)
249 if (prim[i].mode == GL_POINTS)
250 return GL_TRUE;
251 }
252
253 return GL_FALSE;
254 }
255
256 /* May fail if out of video memory for texture or vbo upload, or on
257 * fallback conditions.
258 */
259 static GLboolean brw_try_draw_prims( GLcontext *ctx,
260 const struct gl_client_array *arrays[],
261 const struct _mesa_prim *prim,
262 GLuint nr_prims,
263 const struct _mesa_index_buffer *ib,
264 GLuint min_index,
265 GLuint max_index )
266 {
267 struct intel_context *intel = intel_context(ctx);
268 struct brw_context *brw = brw_context(ctx);
269 GLboolean retval = GL_FALSE;
270 GLuint i, j;
271
272 if (ctx->NewState)
273 _mesa_update_state( ctx );
274
275 /* Bind all inputs, derive varying and size information:
276 */
277 brw_merge_inputs( brw, arrays );
278
279 /* Have to validate state quite late. Will rebuild tnl_program,
280 * which depends on varying information.
281 *
282 * Note this is where brw->vs->prog_data.inputs_read is calculated,
283 * so can't access it earlier.
284 */
285
286 LOCK_HARDWARE(intel);
287
288 if (brw->intel.numClipRects == 0) {
289 assert(intel->batch->ptr == intel->batch->map + intel->batch->offset);
290 UNLOCK_HARDWARE(intel);
291 return GL_TRUE;
292 }
293
294 {
295 /* Set the first primitive early, ahead of validate_state:
296 */
297 brw_set_prim(brw, prim[0].mode);
298
299 /* XXX: Need to separate validate and upload of state.
300 */
301 brw_validate_state( brw );
302
303 /* Various fallback checks:
304 */
305 if (brw->intel.Fallback)
306 goto out;
307
308 if (check_fallbacks( brw, prim, nr_prims ))
309 goto out;
310
311 /* Upload index, vertex data:
312 */
313 if (ib)
314 brw_upload_indices( brw, ib );
315
316 if (!brw_upload_vertices( brw, min_index, max_index)) {
317 goto out;
318 }
319
320 /* For single cliprect, state is already emitted:
321 */
322 if (brw->intel.numClipRects == 1) {
323 for (i = 0; i < nr_prims; i++) {
324 brw_emit_prim(brw, &prim[i]);
325 }
326 }
327 else {
328 /* Otherwise, explicitly do the cliprects at this point:
329 */
330 for (j = 0; j < brw->intel.numClipRects; j++) {
331 brw_emit_cliprect(brw, &brw->intel.pClipRects[j]);
332
333 /* Emit prims to batchbuffer:
334 */
335 for (i = 0; i < nr_prims; i++) {
336 brw_emit_prim(brw, &prim[i]);
337 }
338 }
339 }
340
341 intel->need_flush = GL_TRUE;
342 retval = GL_TRUE;
343 }
344
345 out:
346
347 /* Currently have to do this to synchronize with the map/unmap of
348 * the vertex buffer in brw_exec_api.c. Not sure if there is any
349 * way around this, as not every flush is due to a buffer filling
350 * up.
351 */
352 if (!intel_batchbuffer_flush( brw->intel.batch )) {
353 DBG("%s intel_batchbuffer_flush failed\n", __FUNCTION__);
354 retval = GL_FALSE;
355 }
356
357 if (retval && intel->thrashing) {
358 bmSetFence(intel);
359 }
360
361 /* Free any old data so it doesn't clog up texture memory - we
362 * won't be referencing it again.
363 */
364 while (brw->vb.upload.wrap != brw->vb.upload.buf) {
365 ctx->Driver.BufferData(ctx,
366 GL_ARRAY_BUFFER_ARB,
367 BRW_UPLOAD_INIT_SIZE,
368 NULL,
369 GL_DYNAMIC_DRAW_ARB,
370 brw->vb.upload.vbo[brw->vb.upload.wrap]);
371 brw->vb.upload.wrap++;
372 brw->vb.upload.wrap %= BRW_NR_UPLOAD_BUFS;
373 }
374
375 UNLOCK_HARDWARE(intel);
376
377 if (!retval)
378 DBG("%s failed\n", __FUNCTION__);
379
380 return retval;
381 }
382
383 static GLboolean brw_need_rebase( GLcontext *ctx,
384 const struct gl_client_array *arrays[],
385 const struct _mesa_index_buffer *ib,
386 GLuint min_index )
387 {
388 if (min_index == 0)
389 return GL_FALSE;
390
391 if (ib) {
392 if (!vbo_all_varyings_in_vbos(arrays))
393 return GL_TRUE;
394 else
395 return GL_FALSE;
396 }
397 else {
398 /* Hmm. This isn't quite what I wanted. BRW can actually
399 * handle the mixed case well enough that we shouldn't need to
400 * rebase. However, it's probably not very common, nor hugely
401 * expensive to do it this way:
402 */
403 if (!vbo_all_varyings_in_vbos(arrays))
404 return GL_TRUE;
405 else
406 return GL_FALSE;
407 }
408 }
409
410
411 void brw_draw_prims( GLcontext *ctx,
412 const struct gl_client_array *arrays[],
413 const struct _mesa_prim *prim,
414 GLuint nr_prims,
415 const struct _mesa_index_buffer *ib,
416 GLuint min_index,
417 GLuint max_index )
418 {
419 struct intel_context *intel = intel_context(ctx);
420 GLboolean retval;
421
422 /* Decide if we want to rebase. If so we end up recursing once
423 * only into this function.
424 */
425 if (brw_need_rebase( ctx, arrays, ib, min_index )) {
426 vbo_rebase_prims( ctx, arrays,
427 prim, nr_prims,
428 ib, min_index, max_index,
429 brw_draw_prims );
430
431 return;
432 }
433
434
435 /* Make a first attempt at drawing:
436 */
437 retval = brw_try_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index);
438
439
440 /* This looks like out-of-memory but potentially we have
441 * situation where there is enough memory but it has become
442 * fragmented. Clear out all heaps and start from scratch by
443 * faking a contended lock event: (done elsewhere)
444 */
445 if (!retval && !intel->Fallback && bmError(intel)) {
446 DBG("retrying\n");
447 /* Then try a second time only to upload textures and draw the
448 * primitives:
449 */
450 retval = brw_try_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index);
451 }
452
453 /* Otherwise, we really are out of memory. Pass the drawing
454 * command to the software tnl module and which will in turn call
455 * swrast to do the drawing.
456 */
457 if (!retval) {
458 _tnl_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index);
459 }
460
461 if (intel->aub_file && (INTEL_DEBUG & DEBUG_SYNC)) {
462 intelFinish( &intel->ctx );
463 intel->aub_wrap = 1;
464 }
465 }
466
467
468 static void brw_invalidate_vbo_cb( struct intel_context *intel, void *ptr )
469 {
470 /* nothing to do, we don't rely on the contents being preserved */
471 }
472
473
474 void brw_draw_init( struct brw_context *brw )
475 {
476 GLcontext *ctx = &brw->intel.ctx;
477 struct vbo_context *vbo = vbo_context(ctx);
478 GLuint i;
479
480 /* Register our drawing function:
481 */
482 vbo->draw_prims = brw_draw_prims;
483
484 brw->vb.upload.size = BRW_UPLOAD_INIT_SIZE;
485
486 for (i = 0; i < BRW_NR_UPLOAD_BUFS; i++) {
487 brw->vb.upload.vbo[i] = ctx->Driver.NewBufferObject(ctx, 1, GL_ARRAY_BUFFER_ARB);
488
489 /* NOTE: These are set to no-backing-store.
490 */
491 bmBufferSetInvalidateCB(&brw->intel,
492 intel_bufferobj_buffer(intel_buffer_object(brw->vb.upload.vbo[i])),
493 brw_invalidate_vbo_cb,
494 &brw->intel,
495 GL_TRUE);
496 }
497
498 ctx->Driver.BufferData( ctx,
499 GL_ARRAY_BUFFER_ARB,
500 BRW_UPLOAD_INIT_SIZE,
501 NULL,
502 GL_DYNAMIC_DRAW_ARB,
503 brw->vb.upload.vbo[0] );
504 }
505
506 void brw_draw_destroy( struct brw_context *brw )
507 {
508 GLcontext *ctx = &brw->intel.ctx;
509 GLuint i;
510
511 for (i = 0; i < BRW_NR_UPLOAD_BUFS; i++)
512 ctx->Driver.DeleteBuffer(ctx, brw->vb.upload.vbo[i]);
513 }