i965: Move intel_context::bufmgr to brw_context.
[mesa.git] / src / mesa / drivers / dri / i965 / intel_batchbuffer.c
1 /**************************************************************************
2 *
3 * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 #include "intel_batchbuffer.h"
29 #include "intel_buffer_objects.h"
30 #include "intel_reg.h"
31 #include "intel_bufmgr.h"
32 #include "intel_buffers.h"
33 #include "brw_context.h"
34
35 static void
36 intel_batchbuffer_reset(struct brw_context *brw);
37
38 struct cached_batch_item {
39 struct cached_batch_item *next;
40 uint16_t header;
41 uint16_t size;
42 };
43
44 static void
45 clear_cache(struct brw_context *brw)
46 {
47 struct intel_context *intel = &brw->intel;
48 struct cached_batch_item *item = intel->batch.cached_items;
49
50 while (item) {
51 struct cached_batch_item *next = item->next;
52 free(item);
53 item = next;
54 }
55
56 intel->batch.cached_items = NULL;
57 }
58
59 void
60 intel_batchbuffer_init(struct brw_context *brw)
61 {
62 struct intel_context *intel = &brw->intel;
63 intel_batchbuffer_reset(brw);
64
65 if (intel->gen >= 6) {
66 /* We can't just use brw_state_batch to get a chunk of space for
67 * the gen6 workaround because it involves actually writing to
68 * the buffer, and the kernel doesn't let us write to the batch.
69 */
70 intel->batch.workaround_bo = drm_intel_bo_alloc(brw->bufmgr,
71 "pipe_control workaround",
72 4096, 4096);
73 }
74
75 if (!intel->has_llc) {
76 intel->batch.cpu_map = malloc(BATCH_SZ);
77 intel->batch.map = intel->batch.cpu_map;
78 }
79 }
80
81 static void
82 intel_batchbuffer_reset(struct brw_context *brw)
83 {
84 struct intel_context *intel = &brw->intel;
85 if (intel->batch.last_bo != NULL) {
86 drm_intel_bo_unreference(intel->batch.last_bo);
87 intel->batch.last_bo = NULL;
88 }
89 intel->batch.last_bo = intel->batch.bo;
90
91 clear_cache(brw);
92
93 intel->batch.bo = drm_intel_bo_alloc(brw->bufmgr, "batchbuffer",
94 BATCH_SZ, 4096);
95 if (intel->has_llc) {
96 drm_intel_bo_map(intel->batch.bo, true);
97 intel->batch.map = intel->batch.bo->virtual;
98 }
99
100 intel->batch.reserved_space = BATCH_RESERVED;
101 intel->batch.state_batch_offset = intel->batch.bo->size;
102 intel->batch.used = 0;
103 intel->batch.needs_sol_reset = false;
104 }
105
106 void
107 intel_batchbuffer_save_state(struct brw_context *brw)
108 {
109 struct intel_context *intel = &brw->intel;
110 intel->batch.saved.used = intel->batch.used;
111 intel->batch.saved.reloc_count =
112 drm_intel_gem_bo_get_reloc_count(intel->batch.bo);
113 }
114
115 void
116 intel_batchbuffer_reset_to_saved(struct brw_context *brw)
117 {
118 struct intel_context *intel = &brw->intel;
119 drm_intel_gem_bo_clear_relocs(intel->batch.bo, intel->batch.saved.reloc_count);
120
121 intel->batch.used = intel->batch.saved.used;
122
123 /* Cached batch state is dead, since we just cleared some unknown part of the
124 * batchbuffer. Assume that the caller resets any other state necessary.
125 */
126 clear_cache(brw);
127 }
128
129 void
130 intel_batchbuffer_free(struct brw_context *brw)
131 {
132 struct intel_context *intel = &brw->intel;
133 free(intel->batch.cpu_map);
134 drm_intel_bo_unreference(intel->batch.last_bo);
135 drm_intel_bo_unreference(intel->batch.bo);
136 drm_intel_bo_unreference(intel->batch.workaround_bo);
137 clear_cache(brw);
138 }
139
140 static void
141 do_batch_dump(struct brw_context *brw)
142 {
143 struct intel_context *intel = &brw->intel;
144 struct drm_intel_decode *decode;
145 struct intel_batchbuffer *batch = &intel->batch;
146 int ret;
147
148 decode = drm_intel_decode_context_alloc(intel->intelScreen->deviceID);
149 if (!decode)
150 return;
151
152 ret = drm_intel_bo_map(batch->bo, false);
153 if (ret == 0) {
154 drm_intel_decode_set_batch_pointer(decode,
155 batch->bo->virtual,
156 batch->bo->offset,
157 batch->used);
158 } else {
159 fprintf(stderr,
160 "WARNING: failed to map batchbuffer (%s), "
161 "dumping uploaded data instead.\n", strerror(ret));
162
163 drm_intel_decode_set_batch_pointer(decode,
164 batch->map,
165 batch->bo->offset,
166 batch->used);
167 }
168
169 drm_intel_decode(decode);
170
171 drm_intel_decode_context_free(decode);
172
173 if (ret == 0) {
174 drm_intel_bo_unmap(batch->bo);
175
176 brw_debug_batch(brw);
177 }
178 }
179
180 /* TODO: Push this whole function into bufmgr.
181 */
182 static int
183 do_flush_locked(struct brw_context *brw)
184 {
185 struct intel_context *intel = &brw->intel;
186 struct intel_batchbuffer *batch = &intel->batch;
187 int ret = 0;
188
189 if (intel->has_llc) {
190 drm_intel_bo_unmap(batch->bo);
191 } else {
192 ret = drm_intel_bo_subdata(batch->bo, 0, 4*batch->used, batch->map);
193 if (ret == 0 && batch->state_batch_offset != batch->bo->size) {
194 ret = drm_intel_bo_subdata(batch->bo,
195 batch->state_batch_offset,
196 batch->bo->size - batch->state_batch_offset,
197 (char *)batch->map + batch->state_batch_offset);
198 }
199 }
200
201 if (!intel->intelScreen->no_hw) {
202 int flags;
203
204 if (intel->gen < 6 || !batch->is_blit) {
205 flags = I915_EXEC_RENDER;
206 } else {
207 flags = I915_EXEC_BLT;
208 }
209
210 if (batch->needs_sol_reset)
211 flags |= I915_EXEC_GEN7_SOL_RESET;
212
213 if (ret == 0) {
214 if (unlikely(INTEL_DEBUG & DEBUG_AUB))
215 brw_annotate_aub(brw);
216 if (intel->hw_ctx == NULL || batch->is_blit) {
217 ret = drm_intel_bo_mrb_exec(batch->bo, 4 * batch->used, NULL, 0, 0,
218 flags);
219 } else {
220 ret = drm_intel_gem_bo_context_exec(batch->bo, intel->hw_ctx,
221 4 * batch->used, flags);
222 }
223 }
224 }
225
226 if (unlikely(INTEL_DEBUG & DEBUG_BATCH))
227 do_batch_dump(brw);
228
229 if (ret != 0) {
230 fprintf(stderr, "intel_do_flush_locked failed: %s\n", strerror(-ret));
231 exit(1);
232 }
233 brw->vtbl.new_batch(brw);
234
235 return ret;
236 }
237
238 int
239 _intel_batchbuffer_flush(struct brw_context *brw,
240 const char *file, int line)
241 {
242 struct intel_context *intel = &brw->intel;
243 int ret;
244
245 if (intel->batch.used == 0)
246 return 0;
247
248 if (intel->first_post_swapbuffers_batch == NULL) {
249 intel->first_post_swapbuffers_batch = intel->batch.bo;
250 drm_intel_bo_reference(intel->first_post_swapbuffers_batch);
251 }
252
253 if (unlikely(INTEL_DEBUG & DEBUG_BATCH))
254 fprintf(stderr, "%s:%d: Batchbuffer flush with %db used\n", file, line,
255 4*intel->batch.used);
256
257 intel->batch.reserved_space = 0;
258
259 if (brw->vtbl.finish_batch)
260 brw->vtbl.finish_batch(brw);
261
262 /* Mark the end of the buffer. */
263 intel_batchbuffer_emit_dword(brw, MI_BATCH_BUFFER_END);
264 if (intel->batch.used & 1) {
265 /* Round batchbuffer usage to 2 DWORDs. */
266 intel_batchbuffer_emit_dword(brw, MI_NOOP);
267 }
268
269 intel_upload_finish(brw);
270
271 /* Check that we didn't just wrap our batchbuffer at a bad time. */
272 assert(!intel->no_batch_wrap);
273
274 ret = do_flush_locked(brw);
275
276 if (unlikely(INTEL_DEBUG & DEBUG_SYNC)) {
277 fprintf(stderr, "waiting for idle\n");
278 drm_intel_bo_wait_rendering(intel->batch.bo);
279 }
280
281 /* Reset the buffer:
282 */
283 intel_batchbuffer_reset(brw);
284
285 return ret;
286 }
287
288
289 /* This is the only way buffers get added to the validate list.
290 */
291 bool
292 intel_batchbuffer_emit_reloc(struct brw_context *brw,
293 drm_intel_bo *buffer,
294 uint32_t read_domains, uint32_t write_domain,
295 uint32_t delta)
296 {
297 struct intel_context *intel = &brw->intel;
298 int ret;
299
300 ret = drm_intel_bo_emit_reloc(intel->batch.bo, 4*intel->batch.used,
301 buffer, delta,
302 read_domains, write_domain);
303 assert(ret == 0);
304 (void)ret;
305
306 /*
307 * Using the old buffer offset, write in what the right data would be, in case
308 * the buffer doesn't move and we can short-circuit the relocation processing
309 * in the kernel
310 */
311 intel_batchbuffer_emit_dword(brw, buffer->offset + delta);
312
313 return true;
314 }
315
316 bool
317 intel_batchbuffer_emit_reloc_fenced(struct brw_context *brw,
318 drm_intel_bo *buffer,
319 uint32_t read_domains,
320 uint32_t write_domain,
321 uint32_t delta)
322 {
323 struct intel_context *intel = &brw->intel;
324 int ret;
325
326 ret = drm_intel_bo_emit_reloc_fence(intel->batch.bo, 4*intel->batch.used,
327 buffer, delta,
328 read_domains, write_domain);
329 assert(ret == 0);
330 (void)ret;
331
332 /*
333 * Using the old buffer offset, write in what the right data would
334 * be, in case the buffer doesn't move and we can short-circuit the
335 * relocation processing in the kernel
336 */
337 intel_batchbuffer_emit_dword(brw, buffer->offset + delta);
338
339 return true;
340 }
341
342 void
343 intel_batchbuffer_data(struct brw_context *brw,
344 const void *data, GLuint bytes, bool is_blit)
345 {
346 struct intel_context *intel = &brw->intel;
347 assert((bytes & 3) == 0);
348 intel_batchbuffer_require_space(brw, bytes, is_blit);
349 __memcpy(intel->batch.map + intel->batch.used, data, bytes);
350 intel->batch.used += bytes >> 2;
351 }
352
353 void
354 intel_batchbuffer_cached_advance(struct brw_context *brw)
355 {
356 struct intel_context *intel = &brw->intel;
357 struct cached_batch_item **prev = &intel->batch.cached_items, *item;
358 uint32_t sz = (intel->batch.used - intel->batch.emit) * sizeof(uint32_t);
359 uint32_t *start = intel->batch.map + intel->batch.emit;
360 uint16_t op = *start >> 16;
361
362 while (*prev) {
363 uint32_t *old;
364
365 item = *prev;
366 old = intel->batch.map + item->header;
367 if (op == *old >> 16) {
368 if (item->size == sz && memcmp(old, start, sz) == 0) {
369 if (prev != &intel->batch.cached_items) {
370 *prev = item->next;
371 item->next = intel->batch.cached_items;
372 intel->batch.cached_items = item;
373 }
374 intel->batch.used = intel->batch.emit;
375 return;
376 }
377
378 goto emit;
379 }
380 prev = &item->next;
381 }
382
383 item = malloc(sizeof(struct cached_batch_item));
384 if (item == NULL)
385 return;
386
387 item->next = intel->batch.cached_items;
388 intel->batch.cached_items = item;
389
390 emit:
391 item->size = sz;
392 item->header = intel->batch.emit;
393 }
394
395 /**
396 * Restriction [DevSNB, DevIVB]:
397 *
398 * Prior to changing Depth/Stencil Buffer state (i.e. any combination of
399 * 3DSTATE_DEPTH_BUFFER, 3DSTATE_CLEAR_PARAMS, 3DSTATE_STENCIL_BUFFER,
400 * 3DSTATE_HIER_DEPTH_BUFFER) SW must first issue a pipelined depth stall
401 * (PIPE_CONTROL with Depth Stall bit set), followed by a pipelined depth
402 * cache flush (PIPE_CONTROL with Depth Flush Bit set), followed by
403 * another pipelined depth stall (PIPE_CONTROL with Depth Stall bit set),
404 * unless SW can otherwise guarantee that the pipeline from WM onwards is
405 * already flushed (e.g., via a preceding MI_FLUSH).
406 */
407 void
408 intel_emit_depth_stall_flushes(struct brw_context *brw)
409 {
410 struct intel_context *intel = &brw->intel;
411 assert(intel->gen >= 6 && intel->gen <= 7);
412
413 BEGIN_BATCH(4);
414 OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2));
415 OUT_BATCH(PIPE_CONTROL_DEPTH_STALL);
416 OUT_BATCH(0); /* address */
417 OUT_BATCH(0); /* write data */
418 ADVANCE_BATCH()
419
420 BEGIN_BATCH(4);
421 OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2));
422 OUT_BATCH(PIPE_CONTROL_DEPTH_CACHE_FLUSH);
423 OUT_BATCH(0); /* address */
424 OUT_BATCH(0); /* write data */
425 ADVANCE_BATCH();
426
427 BEGIN_BATCH(4);
428 OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2));
429 OUT_BATCH(PIPE_CONTROL_DEPTH_STALL);
430 OUT_BATCH(0); /* address */
431 OUT_BATCH(0); /* write data */
432 ADVANCE_BATCH();
433 }
434
435 /**
436 * From the BSpec, volume 2a.03: VS Stage Input / State:
437 * "[DevIVB] A PIPE_CONTROL with Post-Sync Operation set to 1h and a depth
438 * stall needs to be sent just prior to any 3DSTATE_VS, 3DSTATE_URB_VS,
439 * 3DSTATE_CONSTANT_VS, 3DSTATE_BINDING_TABLE_POINTER_VS,
440 * 3DSTATE_SAMPLER_STATE_POINTER_VS command. Only one PIPE_CONTROL needs
441 * to be sent before any combination of VS associated 3DSTATE."
442 */
443 void
444 gen7_emit_vs_workaround_flush(struct brw_context *brw)
445 {
446 struct intel_context *intel = &brw->intel;
447 assert(intel->gen == 7);
448
449 BEGIN_BATCH(4);
450 OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2));
451 OUT_BATCH(PIPE_CONTROL_DEPTH_STALL | PIPE_CONTROL_WRITE_IMMEDIATE);
452 OUT_RELOC(intel->batch.workaround_bo,
453 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0);
454 OUT_BATCH(0); /* write data */
455 ADVANCE_BATCH();
456 }
457
458 /**
459 * Emits a PIPE_CONTROL with a non-zero post-sync operation, for
460 * implementing two workarounds on gen6. From section 1.4.7.1
461 * "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1:
462 *
463 * [DevSNB-C+{W/A}] Before any depth stall flush (including those
464 * produced by non-pipelined state commands), software needs to first
465 * send a PIPE_CONTROL with no bits set except Post-Sync Operation !=
466 * 0.
467 *
468 * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable
469 * =1, a PIPE_CONTROL with any non-zero post-sync-op is required.
470 *
471 * And the workaround for these two requires this workaround first:
472 *
473 * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent
474 * BEFORE the pipe-control with a post-sync op and no write-cache
475 * flushes.
476 *
477 * And this last workaround is tricky because of the requirements on
478 * that bit. From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM
479 * volume 2 part 1:
480 *
481 * "1 of the following must also be set:
482 * - Render Target Cache Flush Enable ([12] of DW1)
483 * - Depth Cache Flush Enable ([0] of DW1)
484 * - Stall at Pixel Scoreboard ([1] of DW1)
485 * - Depth Stall ([13] of DW1)
486 * - Post-Sync Operation ([13] of DW1)
487 * - Notify Enable ([8] of DW1)"
488 *
489 * The cache flushes require the workaround flush that triggered this
490 * one, so we can't use it. Depth stall would trigger the same.
491 * Post-sync nonzero is what triggered this second workaround, so we
492 * can't use that one either. Notify enable is IRQs, which aren't
493 * really our business. That leaves only stall at scoreboard.
494 */
495 void
496 intel_emit_post_sync_nonzero_flush(struct brw_context *brw)
497 {
498 struct intel_context *intel = &brw->intel;
499 if (!intel->batch.need_workaround_flush)
500 return;
501
502 BEGIN_BATCH(4);
503 OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2));
504 OUT_BATCH(PIPE_CONTROL_CS_STALL |
505 PIPE_CONTROL_STALL_AT_SCOREBOARD);
506 OUT_BATCH(0); /* address */
507 OUT_BATCH(0); /* write data */
508 ADVANCE_BATCH();
509
510 BEGIN_BATCH(4);
511 OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2));
512 OUT_BATCH(PIPE_CONTROL_WRITE_IMMEDIATE);
513 OUT_RELOC(intel->batch.workaround_bo,
514 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0);
515 OUT_BATCH(0); /* write data */
516 ADVANCE_BATCH();
517
518 intel->batch.need_workaround_flush = false;
519 }
520
521 /* Emit a pipelined flush to either flush render and texture cache for
522 * reading from a FBO-drawn texture, or flush so that frontbuffer
523 * render appears on the screen in DRI1.
524 *
525 * This is also used for the always_flush_cache driconf debug option.
526 */
527 void
528 intel_batchbuffer_emit_mi_flush(struct brw_context *brw)
529 {
530 struct intel_context *intel = &brw->intel;
531 if (intel->gen >= 6) {
532 if (intel->batch.is_blit) {
533 BEGIN_BATCH_BLT(4);
534 OUT_BATCH(MI_FLUSH_DW);
535 OUT_BATCH(0);
536 OUT_BATCH(0);
537 OUT_BATCH(0);
538 ADVANCE_BATCH();
539 } else {
540 if (intel->gen == 6) {
541 /* Hardware workaround: SNB B-Spec says:
542 *
543 * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache
544 * Flush Enable =1, a PIPE_CONTROL with any non-zero
545 * post-sync-op is required.
546 */
547 intel_emit_post_sync_nonzero_flush(brw);
548 }
549
550 BEGIN_BATCH(4);
551 OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2));
552 OUT_BATCH(PIPE_CONTROL_INSTRUCTION_FLUSH |
553 PIPE_CONTROL_WRITE_FLUSH |
554 PIPE_CONTROL_DEPTH_CACHE_FLUSH |
555 PIPE_CONTROL_VF_CACHE_INVALIDATE |
556 PIPE_CONTROL_TC_FLUSH |
557 PIPE_CONTROL_NO_WRITE |
558 PIPE_CONTROL_CS_STALL);
559 OUT_BATCH(0); /* write address */
560 OUT_BATCH(0); /* write data */
561 ADVANCE_BATCH();
562 }
563 } else {
564 BEGIN_BATCH(4);
565 OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2) |
566 PIPE_CONTROL_WRITE_FLUSH |
567 PIPE_CONTROL_NO_WRITE);
568 OUT_BATCH(0); /* write address */
569 OUT_BATCH(0); /* write data */
570 OUT_BATCH(0); /* write data */
571 ADVANCE_BATCH();
572 }
573 }