i965: Move intel_context::no_batch_wrap to brw_context.
[mesa.git] / src / mesa / drivers / dri / i965 / intel_batchbuffer.c
1 /**************************************************************************
2 *
3 * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 #include "intel_batchbuffer.h"
29 #include "intel_buffer_objects.h"
30 #include "intel_reg.h"
31 #include "intel_bufmgr.h"
32 #include "intel_buffers.h"
33 #include "brw_context.h"
34
35 static void
36 intel_batchbuffer_reset(struct brw_context *brw);
37
38 struct cached_batch_item {
39 struct cached_batch_item *next;
40 uint16_t header;
41 uint16_t size;
42 };
43
44 static void
45 clear_cache(struct brw_context *brw)
46 {
47 struct cached_batch_item *item = brw->batch.cached_items;
48
49 while (item) {
50 struct cached_batch_item *next = item->next;
51 free(item);
52 item = next;
53 }
54
55 brw->batch.cached_items = NULL;
56 }
57
58 void
59 intel_batchbuffer_init(struct brw_context *brw)
60 {
61 struct intel_context *intel = &brw->intel;
62 intel_batchbuffer_reset(brw);
63
64 if (intel->gen >= 6) {
65 /* We can't just use brw_state_batch to get a chunk of space for
66 * the gen6 workaround because it involves actually writing to
67 * the buffer, and the kernel doesn't let us write to the batch.
68 */
69 brw->batch.workaround_bo = drm_intel_bo_alloc(brw->bufmgr,
70 "pipe_control workaround",
71 4096, 4096);
72 }
73
74 if (!intel->has_llc) {
75 brw->batch.cpu_map = malloc(BATCH_SZ);
76 brw->batch.map = brw->batch.cpu_map;
77 }
78 }
79
80 static void
81 intel_batchbuffer_reset(struct brw_context *brw)
82 {
83 struct intel_context *intel = &brw->intel;
84 if (brw->batch.last_bo != NULL) {
85 drm_intel_bo_unreference(brw->batch.last_bo);
86 brw->batch.last_bo = NULL;
87 }
88 brw->batch.last_bo = brw->batch.bo;
89
90 clear_cache(brw);
91
92 brw->batch.bo = drm_intel_bo_alloc(brw->bufmgr, "batchbuffer",
93 BATCH_SZ, 4096);
94 if (intel->has_llc) {
95 drm_intel_bo_map(brw->batch.bo, true);
96 brw->batch.map = brw->batch.bo->virtual;
97 }
98
99 brw->batch.reserved_space = BATCH_RESERVED;
100 brw->batch.state_batch_offset = brw->batch.bo->size;
101 brw->batch.used = 0;
102 brw->batch.needs_sol_reset = false;
103 }
104
105 void
106 intel_batchbuffer_save_state(struct brw_context *brw)
107 {
108 brw->batch.saved.used = brw->batch.used;
109 brw->batch.saved.reloc_count =
110 drm_intel_gem_bo_get_reloc_count(brw->batch.bo);
111 }
112
113 void
114 intel_batchbuffer_reset_to_saved(struct brw_context *brw)
115 {
116 drm_intel_gem_bo_clear_relocs(brw->batch.bo, brw->batch.saved.reloc_count);
117
118 brw->batch.used = brw->batch.saved.used;
119
120 /* Cached batch state is dead, since we just cleared some unknown part of the
121 * batchbuffer. Assume that the caller resets any other state necessary.
122 */
123 clear_cache(brw);
124 }
125
126 void
127 intel_batchbuffer_free(struct brw_context *brw)
128 {
129 free(brw->batch.cpu_map);
130 drm_intel_bo_unreference(brw->batch.last_bo);
131 drm_intel_bo_unreference(brw->batch.bo);
132 drm_intel_bo_unreference(brw->batch.workaround_bo);
133 clear_cache(brw);
134 }
135
136 static void
137 do_batch_dump(struct brw_context *brw)
138 {
139 struct intel_context *intel = &brw->intel;
140 struct drm_intel_decode *decode;
141 struct intel_batchbuffer *batch = &brw->batch;
142 int ret;
143
144 decode = drm_intel_decode_context_alloc(intel->intelScreen->deviceID);
145 if (!decode)
146 return;
147
148 ret = drm_intel_bo_map(batch->bo, false);
149 if (ret == 0) {
150 drm_intel_decode_set_batch_pointer(decode,
151 batch->bo->virtual,
152 batch->bo->offset,
153 batch->used);
154 } else {
155 fprintf(stderr,
156 "WARNING: failed to map batchbuffer (%s), "
157 "dumping uploaded data instead.\n", strerror(ret));
158
159 drm_intel_decode_set_batch_pointer(decode,
160 batch->map,
161 batch->bo->offset,
162 batch->used);
163 }
164
165 drm_intel_decode(decode);
166
167 drm_intel_decode_context_free(decode);
168
169 if (ret == 0) {
170 drm_intel_bo_unmap(batch->bo);
171
172 brw_debug_batch(brw);
173 }
174 }
175
176 /* TODO: Push this whole function into bufmgr.
177 */
178 static int
179 do_flush_locked(struct brw_context *brw)
180 {
181 struct intel_context *intel = &brw->intel;
182 struct intel_batchbuffer *batch = &brw->batch;
183 int ret = 0;
184
185 if (intel->has_llc) {
186 drm_intel_bo_unmap(batch->bo);
187 } else {
188 ret = drm_intel_bo_subdata(batch->bo, 0, 4*batch->used, batch->map);
189 if (ret == 0 && batch->state_batch_offset != batch->bo->size) {
190 ret = drm_intel_bo_subdata(batch->bo,
191 batch->state_batch_offset,
192 batch->bo->size - batch->state_batch_offset,
193 (char *)batch->map + batch->state_batch_offset);
194 }
195 }
196
197 if (!intel->intelScreen->no_hw) {
198 int flags;
199
200 if (intel->gen < 6 || !batch->is_blit) {
201 flags = I915_EXEC_RENDER;
202 } else {
203 flags = I915_EXEC_BLT;
204 }
205
206 if (batch->needs_sol_reset)
207 flags |= I915_EXEC_GEN7_SOL_RESET;
208
209 if (ret == 0) {
210 if (unlikely(INTEL_DEBUG & DEBUG_AUB))
211 brw_annotate_aub(brw);
212 if (brw->hw_ctx == NULL || batch->is_blit) {
213 ret = drm_intel_bo_mrb_exec(batch->bo, 4 * batch->used, NULL, 0, 0,
214 flags);
215 } else {
216 ret = drm_intel_gem_bo_context_exec(batch->bo, brw->hw_ctx,
217 4 * batch->used, flags);
218 }
219 }
220 }
221
222 if (unlikely(INTEL_DEBUG & DEBUG_BATCH))
223 do_batch_dump(brw);
224
225 if (ret != 0) {
226 fprintf(stderr, "intel_do_flush_locked failed: %s\n", strerror(-ret));
227 exit(1);
228 }
229 brw->vtbl.new_batch(brw);
230
231 return ret;
232 }
233
234 int
235 _intel_batchbuffer_flush(struct brw_context *brw,
236 const char *file, int line)
237 {
238 int ret;
239
240 if (brw->batch.used == 0)
241 return 0;
242
243 if (brw->first_post_swapbuffers_batch == NULL) {
244 brw->first_post_swapbuffers_batch = brw->batch.bo;
245 drm_intel_bo_reference(brw->first_post_swapbuffers_batch);
246 }
247
248 if (unlikely(INTEL_DEBUG & DEBUG_BATCH))
249 fprintf(stderr, "%s:%d: Batchbuffer flush with %db used\n", file, line,
250 4*brw->batch.used);
251
252 brw->batch.reserved_space = 0;
253
254 if (brw->vtbl.finish_batch)
255 brw->vtbl.finish_batch(brw);
256
257 /* Mark the end of the buffer. */
258 intel_batchbuffer_emit_dword(brw, MI_BATCH_BUFFER_END);
259 if (brw->batch.used & 1) {
260 /* Round batchbuffer usage to 2 DWORDs. */
261 intel_batchbuffer_emit_dword(brw, MI_NOOP);
262 }
263
264 intel_upload_finish(brw);
265
266 /* Check that we didn't just wrap our batchbuffer at a bad time. */
267 assert(!brw->no_batch_wrap);
268
269 ret = do_flush_locked(brw);
270
271 if (unlikely(INTEL_DEBUG & DEBUG_SYNC)) {
272 fprintf(stderr, "waiting for idle\n");
273 drm_intel_bo_wait_rendering(brw->batch.bo);
274 }
275
276 /* Reset the buffer:
277 */
278 intel_batchbuffer_reset(brw);
279
280 return ret;
281 }
282
283
284 /* This is the only way buffers get added to the validate list.
285 */
286 bool
287 intel_batchbuffer_emit_reloc(struct brw_context *brw,
288 drm_intel_bo *buffer,
289 uint32_t read_domains, uint32_t write_domain,
290 uint32_t delta)
291 {
292 int ret;
293
294 ret = drm_intel_bo_emit_reloc(brw->batch.bo, 4*brw->batch.used,
295 buffer, delta,
296 read_domains, write_domain);
297 assert(ret == 0);
298 (void)ret;
299
300 /*
301 * Using the old buffer offset, write in what the right data would be, in case
302 * the buffer doesn't move and we can short-circuit the relocation processing
303 * in the kernel
304 */
305 intel_batchbuffer_emit_dword(brw, buffer->offset + delta);
306
307 return true;
308 }
309
310 bool
311 intel_batchbuffer_emit_reloc_fenced(struct brw_context *brw,
312 drm_intel_bo *buffer,
313 uint32_t read_domains,
314 uint32_t write_domain,
315 uint32_t delta)
316 {
317 int ret;
318
319 ret = drm_intel_bo_emit_reloc_fence(brw->batch.bo, 4*brw->batch.used,
320 buffer, delta,
321 read_domains, write_domain);
322 assert(ret == 0);
323 (void)ret;
324
325 /*
326 * Using the old buffer offset, write in what the right data would
327 * be, in case the buffer doesn't move and we can short-circuit the
328 * relocation processing in the kernel
329 */
330 intel_batchbuffer_emit_dword(brw, buffer->offset + delta);
331
332 return true;
333 }
334
335 void
336 intel_batchbuffer_data(struct brw_context *brw,
337 const void *data, GLuint bytes, bool is_blit)
338 {
339 assert((bytes & 3) == 0);
340 intel_batchbuffer_require_space(brw, bytes, is_blit);
341 __memcpy(brw->batch.map + brw->batch.used, data, bytes);
342 brw->batch.used += bytes >> 2;
343 }
344
345 void
346 intel_batchbuffer_cached_advance(struct brw_context *brw)
347 {
348 struct cached_batch_item **prev = &brw->batch.cached_items, *item;
349 uint32_t sz = (brw->batch.used - brw->batch.emit) * sizeof(uint32_t);
350 uint32_t *start = brw->batch.map + brw->batch.emit;
351 uint16_t op = *start >> 16;
352
353 while (*prev) {
354 uint32_t *old;
355
356 item = *prev;
357 old = brw->batch.map + item->header;
358 if (op == *old >> 16) {
359 if (item->size == sz && memcmp(old, start, sz) == 0) {
360 if (prev != &brw->batch.cached_items) {
361 *prev = item->next;
362 item->next = brw->batch.cached_items;
363 brw->batch.cached_items = item;
364 }
365 brw->batch.used = brw->batch.emit;
366 return;
367 }
368
369 goto emit;
370 }
371 prev = &item->next;
372 }
373
374 item = malloc(sizeof(struct cached_batch_item));
375 if (item == NULL)
376 return;
377
378 item->next = brw->batch.cached_items;
379 brw->batch.cached_items = item;
380
381 emit:
382 item->size = sz;
383 item->header = brw->batch.emit;
384 }
385
386 /**
387 * Restriction [DevSNB, DevIVB]:
388 *
389 * Prior to changing Depth/Stencil Buffer state (i.e. any combination of
390 * 3DSTATE_DEPTH_BUFFER, 3DSTATE_CLEAR_PARAMS, 3DSTATE_STENCIL_BUFFER,
391 * 3DSTATE_HIER_DEPTH_BUFFER) SW must first issue a pipelined depth stall
392 * (PIPE_CONTROL with Depth Stall bit set), followed by a pipelined depth
393 * cache flush (PIPE_CONTROL with Depth Flush Bit set), followed by
394 * another pipelined depth stall (PIPE_CONTROL with Depth Stall bit set),
395 * unless SW can otherwise guarantee that the pipeline from WM onwards is
396 * already flushed (e.g., via a preceding MI_FLUSH).
397 */
398 void
399 intel_emit_depth_stall_flushes(struct brw_context *brw)
400 {
401 struct intel_context *intel = &brw->intel;
402 assert(intel->gen >= 6 && intel->gen <= 7);
403
404 BEGIN_BATCH(4);
405 OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2));
406 OUT_BATCH(PIPE_CONTROL_DEPTH_STALL);
407 OUT_BATCH(0); /* address */
408 OUT_BATCH(0); /* write data */
409 ADVANCE_BATCH()
410
411 BEGIN_BATCH(4);
412 OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2));
413 OUT_BATCH(PIPE_CONTROL_DEPTH_CACHE_FLUSH);
414 OUT_BATCH(0); /* address */
415 OUT_BATCH(0); /* write data */
416 ADVANCE_BATCH();
417
418 BEGIN_BATCH(4);
419 OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2));
420 OUT_BATCH(PIPE_CONTROL_DEPTH_STALL);
421 OUT_BATCH(0); /* address */
422 OUT_BATCH(0); /* write data */
423 ADVANCE_BATCH();
424 }
425
426 /**
427 * From the BSpec, volume 2a.03: VS Stage Input / State:
428 * "[DevIVB] A PIPE_CONTROL with Post-Sync Operation set to 1h and a depth
429 * stall needs to be sent just prior to any 3DSTATE_VS, 3DSTATE_URB_VS,
430 * 3DSTATE_CONSTANT_VS, 3DSTATE_BINDING_TABLE_POINTER_VS,
431 * 3DSTATE_SAMPLER_STATE_POINTER_VS command. Only one PIPE_CONTROL needs
432 * to be sent before any combination of VS associated 3DSTATE."
433 */
434 void
435 gen7_emit_vs_workaround_flush(struct brw_context *brw)
436 {
437 struct intel_context *intel = &brw->intel;
438 assert(intel->gen == 7);
439
440 BEGIN_BATCH(4);
441 OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2));
442 OUT_BATCH(PIPE_CONTROL_DEPTH_STALL | PIPE_CONTROL_WRITE_IMMEDIATE);
443 OUT_RELOC(brw->batch.workaround_bo,
444 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0);
445 OUT_BATCH(0); /* write data */
446 ADVANCE_BATCH();
447 }
448
449 /**
450 * Emits a PIPE_CONTROL with a non-zero post-sync operation, for
451 * implementing two workarounds on gen6. From section 1.4.7.1
452 * "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1:
453 *
454 * [DevSNB-C+{W/A}] Before any depth stall flush (including those
455 * produced by non-pipelined state commands), software needs to first
456 * send a PIPE_CONTROL with no bits set except Post-Sync Operation !=
457 * 0.
458 *
459 * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable
460 * =1, a PIPE_CONTROL with any non-zero post-sync-op is required.
461 *
462 * And the workaround for these two requires this workaround first:
463 *
464 * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent
465 * BEFORE the pipe-control with a post-sync op and no write-cache
466 * flushes.
467 *
468 * And this last workaround is tricky because of the requirements on
469 * that bit. From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM
470 * volume 2 part 1:
471 *
472 * "1 of the following must also be set:
473 * - Render Target Cache Flush Enable ([12] of DW1)
474 * - Depth Cache Flush Enable ([0] of DW1)
475 * - Stall at Pixel Scoreboard ([1] of DW1)
476 * - Depth Stall ([13] of DW1)
477 * - Post-Sync Operation ([13] of DW1)
478 * - Notify Enable ([8] of DW1)"
479 *
480 * The cache flushes require the workaround flush that triggered this
481 * one, so we can't use it. Depth stall would trigger the same.
482 * Post-sync nonzero is what triggered this second workaround, so we
483 * can't use that one either. Notify enable is IRQs, which aren't
484 * really our business. That leaves only stall at scoreboard.
485 */
486 void
487 intel_emit_post_sync_nonzero_flush(struct brw_context *brw)
488 {
489 if (!brw->batch.need_workaround_flush)
490 return;
491
492 BEGIN_BATCH(4);
493 OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2));
494 OUT_BATCH(PIPE_CONTROL_CS_STALL |
495 PIPE_CONTROL_STALL_AT_SCOREBOARD);
496 OUT_BATCH(0); /* address */
497 OUT_BATCH(0); /* write data */
498 ADVANCE_BATCH();
499
500 BEGIN_BATCH(4);
501 OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2));
502 OUT_BATCH(PIPE_CONTROL_WRITE_IMMEDIATE);
503 OUT_RELOC(brw->batch.workaround_bo,
504 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0);
505 OUT_BATCH(0); /* write data */
506 ADVANCE_BATCH();
507
508 brw->batch.need_workaround_flush = false;
509 }
510
511 /* Emit a pipelined flush to either flush render and texture cache for
512 * reading from a FBO-drawn texture, or flush so that frontbuffer
513 * render appears on the screen in DRI1.
514 *
515 * This is also used for the always_flush_cache driconf debug option.
516 */
517 void
518 intel_batchbuffer_emit_mi_flush(struct brw_context *brw)
519 {
520 struct intel_context *intel = &brw->intel;
521 if (intel->gen >= 6) {
522 if (brw->batch.is_blit) {
523 BEGIN_BATCH_BLT(4);
524 OUT_BATCH(MI_FLUSH_DW);
525 OUT_BATCH(0);
526 OUT_BATCH(0);
527 OUT_BATCH(0);
528 ADVANCE_BATCH();
529 } else {
530 if (intel->gen == 6) {
531 /* Hardware workaround: SNB B-Spec says:
532 *
533 * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache
534 * Flush Enable =1, a PIPE_CONTROL with any non-zero
535 * post-sync-op is required.
536 */
537 intel_emit_post_sync_nonzero_flush(brw);
538 }
539
540 BEGIN_BATCH(4);
541 OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2));
542 OUT_BATCH(PIPE_CONTROL_INSTRUCTION_FLUSH |
543 PIPE_CONTROL_WRITE_FLUSH |
544 PIPE_CONTROL_DEPTH_CACHE_FLUSH |
545 PIPE_CONTROL_VF_CACHE_INVALIDATE |
546 PIPE_CONTROL_TC_FLUSH |
547 PIPE_CONTROL_NO_WRITE |
548 PIPE_CONTROL_CS_STALL);
549 OUT_BATCH(0); /* write address */
550 OUT_BATCH(0); /* write data */
551 ADVANCE_BATCH();
552 }
553 } else {
554 BEGIN_BATCH(4);
555 OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2) |
556 PIPE_CONTROL_WRITE_FLUSH |
557 PIPE_CONTROL_NO_WRITE);
558 OUT_BATCH(0); /* write address */
559 OUT_BATCH(0); /* write data */
560 OUT_BATCH(0); /* write data */
561 ADVANCE_BATCH();
562 }
563 }